Importing Main Dataset
#Data File: CleanDNAprepData1.18.19
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(emmeans)
library(multcompView)
#Read Data
AllData <- read.table("CleanDNAprepData1.18.19.txt", sep="\t", fill = TRUE, header=TRUE)
SampleData <- AllData %>% filter(VariableSampleType!="Standard", VariableSampleType!="NP40InoculatedMilk")
dim (SampleData)
## [1] 1440 42
write.table (SampleData, "SampleData.txt", sep="\t" )
#Getting All variables and all levels within each
#sapply(SampleData, levels)
#SampleData %>%
# sapply(levels)
#Summary Statistics
SampleData.summary <- SampleData %>%
group_by(Assay,VariableKit,VariableSampleType) %>%
summarize(mean_CopyN_permLMilk=mean(LogCopiespermLofMilk,na.rm=T),
st_dev=sd(LogCopiespermLofMilk,na.rm=T),
n_missing=sum(is.na(LogCopiespermLofMilk)),
n_total=n())
## `summarise()` regrouping output by 'Assay', 'VariableKit' (override with `.groups` argument)
write.table (SampleData.summary, "SampleData.summary.txt", sep="\t" )
SampleData.summary.by.replicate <- SampleData %>%
group_by(Assay, SpikeSet, VariableKit) %>%
summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
st_dev=sd(LogCopiespermLofMilk,na.rm=T),
n_missing=sum(is.na(LogCopiespermLofMilk)),
n_total=n())%>%
data.frame()
## `summarise()` regrouping output by 'Assay', 'SpikeSet' (override with `.groups` argument)
write.table (SampleData.summary.by.replicate, "SampleData.summary.by.replicate.txt", sep="\t" )
Bovine
#Filter Subset from Sample Data
Bovine <- SampleData %>% filter(Assay=="Bovine DNA")
dim(Bovine)
## [1] 240 42
#Summary Statistics
Bovine.summary <- Bovine %>%
group_by(VariableKit,VariableSampleType) %>%
summarize(mean_CopyN_permLMilk=mean(LogCopiespermLofMilk,na.rm=T),
st_dev=sd(LogCopiespermLofMilk,na.rm=T),
n_missing=sum(is.na(LogCopiespermLofMilk)),
n_total=n())
## `summarise()` regrouping output by 'VariableKit' (override with `.groups` argument)
write.table (Bovine.summary, "Bovine.summary.txt", sep="\t" )
#Plot Raw Means and Standard Deviations
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2338B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(data=Bovine,
mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk,z=VariableKit, color=VariableKit)) +
ylab ("Log10 Copies / mL of Milk")+
geom_boxplot(lwd=1)+
theme_bw()+
ggtitle("Bovine DNA Copy Numbers")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk", "MockCommunity", "NoTemplateControl"))
## Warning: Removed 80 rows containing non-finite values (stat_boxplot).
ggplot(data=Bovine,
mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
ylab ("Log10 Copies / mL of Milk")+
geom_jitter(width=0.25)+
ggtitle("Bovine DNA Copy Numbers")+
theme_bw()+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk", "MockCommunity", "NoTemplateControl"))
## Warning: Removed 80 rows containing missing values (geom_point).
# Inoculated Milk Data
Bovine.InnOnly <- Bovine %>% filter(VariableSampleType=="InoculatedMilk", LogCopiespermLofMilk>0.001)
dim(Bovine.InnOnly)
## [1] 120 42
Bovine.InnOnly %>%
group_by(VariableKit) %>%
summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
st_dev=sd(LogCopiespermLofMilk,na.rm=T),
n_missing=sum(is.na(LogCopiespermLofMilk)),
n_total=n())%>%
data.frame()
## `summarise()` ungrouping output (override with `.groups` argument)
## VariableKit mean_LogCopiespermLofMilk st_dev n_missing n_total
## 1 COREDNA 5.447426 0.08176974 0 18
## 2 EZFood 4.707684 0.59361196 0 12
## 3 Mastitis 5.365196 0.05349710 0 18
## 4 Pfood 4.863607 0.07949570 0 18
## 5 PSoilP 4.050068 0.23971891 0 18
## 6 PviralDNA 5.199113 0.10942986 0 18
## 7 ZymoDNA 4.810397 0.06312697 0 18
####
Bovine.InnOnly.Summary = Bovine.InnOnly %>%
group_by(VariableKit) %>%
summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
st_dev=sd(LogCopiespermLofMilk,na.rm=T),
n_missing=sum(is.na(LogCopiespermLofMilk)),
n_total=n())%>%
data.frame()
## `summarise()` ungrouping output (override with `.groups` argument)
write.table (Bovine.InnOnly.Summary, "BovineInnOnly.summary.txt", sep="\t" )
write.table (Bovine.InnOnly, "BovineInnOnly.txt", sep="\t" )
Bovine.InnOnly %>%
group_by(SpikeSet, VariableKit) %>%
summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
st_dev=sd(LogCopiespermLofMilk,na.rm=T),
n_missing=sum(is.na(LogCopiespermLofMilk)),
n_total=n())%>%
data.frame()
## `summarise()` regrouping output by 'SpikeSet' (override with `.groups` argument)
## SpikeSet VariableKit mean_LogCopiespermLofMilk st_dev n_missing n_total
## 1 First COREDNA 5.510743 0.05437988 0 6
## 2 First EZFood 5.023606 0.07381508 0 6
## 3 First Mastitis 5.337045 0.05887863 0 6
## 4 First Pfood 4.875976 0.05016906 0 6
## 5 First PSoilP 3.832946 0.11231042 0 6
## 6 First PviralDNA 5.226245 0.05143839 0 6
## 7 First ZymoDNA 4.830652 0.05446231 0 6
## 8 Second COREDNA 5.396342 0.07460889 0 6
## 9 Second Mastitis 5.394293 0.02064839 0 6
## 10 Second Pfood 4.775180 0.03994283 0 6
## 11 Second PSoilP 4.084926 0.19020018 0 6
## 12 Second PviralDNA 5.085082 0.08519742 0 6
## 13 Second ZymoDNA 4.785882 0.03169048 0 6
## 14 Third COREDNA 5.435192 0.07802399 0 6
## 15 Third EZFood 4.391762 0.72817640 0 6
## 16 Third Mastitis 5.364249 0.06220550 0 6
## 17 Third Pfood 4.939664 0.02942455 0 6
## 18 Third PSoilP 4.232332 0.22067912 0 6
## 19 Third PviralDNA 5.286011 0.07253691 0 6
## 20 Third ZymoDNA 4.814657 0.09134190 0 6
ggplot(data=Bovine.InnOnly,
mapping=aes(x=VariableKit,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
geom_jitter(width=0.25)+
ggtitle("Bovine DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
ggplot(Bovine.InnOnly, aes(VariableKit,LogCopiespermLofMilk,shape = factor(SpikeSet))) +
scale_shape_discrete(solid=F) +
geom_jitter(aes(colour = VariableKit), size = 2,stroke = 1, width = .5) +
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Bovine DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
# Model Selection
#3 linear models were compared: including SpikeSet only, qPCRefficiency only, and both as covariates. Best model fit was used as the final model.
# Model 1 - VariableKit + SpikeSet
m_Bovine.LogCopiespermLofMilk1 <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Bovine.InnOnly )
summary(m_Bovine.LogCopiespermLofMilk1)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Bovine.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.13707 -0.06085 0.00067 0.06704 0.54510
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.47765 0.05874 93.255 < 2e-16 ***
## VariableKitEZFood -0.75758 0.08288 -9.141 3.39e-15 ***
## VariableKitMastitis -0.08223 0.07292 -1.128 0.261910
## VariableKitPfood -0.58382 0.07292 -8.006 1.27e-12 ***
## VariableKitPSoilP -1.39736 0.07292 -19.162 < 2e-16 ***
## VariableKitPviralDNA -0.24831 0.07292 -3.405 0.000921 ***
## VariableKitZymoDNA -0.63703 0.07292 -8.736 2.86e-14 ***
## SpikeSetSecond -0.06591 0.05064 -1.302 0.195747
## SpikeSetThird -0.02476 0.04774 -0.519 0.604983
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2188 on 111 degrees of freedom
## Multiple R-squared: 0.8217, Adjusted R-squared: 0.8088
## F-statistic: 63.94 on 8 and 111 DF, p-value: < 2.2e-16
# Model 2 - VariableKit + qPCRefficiency
m_Bovine.LogCopiespermLofMilk2 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, data=Bovine.InnOnly )
summary(m_Bovine.LogCopiespermLofMilk2)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency,
## data = Bovine.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.14589 -0.06992 0.00801 0.06122 0.53629
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.32738 0.82569 6.452 2.91e-09 ***
## VariableKitEZFood -0.73978 0.08178 -9.046 5.24e-15 ***
## VariableKitMastitis -0.08275 0.07323 -1.130 0.260914
## VariableKitPfood -0.58434 0.07323 -7.979 1.39e-12 ***
## VariableKitPSoilP -1.39788 0.07323 -19.088 < 2e-16 ***
## VariableKitPviralDNA -0.24831 0.07315 -3.395 0.000951 ***
## VariableKitZymoDNA -0.63755 0.07323 -8.706 3.15e-14 ***
## qPCRefficiency 0.13332 0.91514 0.146 0.884437
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2194 on 112 degrees of freedom
## Multiple R-squared: 0.819, Adjusted R-squared: 0.8077
## F-statistic: 72.39 on 7 and 112 DF, p-value: < 2.2e-16
# Model 3 - VariableKit + qPCRefficiency + SpikeSet
m_Bovine.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Bovine.InnOnly )
summary(m_Bovine.LogCopiespermLofMilk3)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency +
## SpikeSet, data = Bovine.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.14165 -0.06039 0.00318 0.06822 0.54053
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.00863 7.32053 1.094 0.276347
## VariableKitEZFood -0.74512 0.09068 -8.217 4.48e-13 ***
## VariableKitMastitis -0.07155 0.07946 -0.900 0.369853
## VariableKitPfood -0.57314 0.07946 -7.213 7.39e-11 ***
## VariableKitPSoilP -1.38668 0.07946 -17.451 < 2e-16 ***
## VariableKitPviralDNA -0.24831 0.07321 -3.392 0.000966 ***
## VariableKitZymoDNA -0.62635 0.07946 -7.882 2.51e-12 ***
## qPCRefficiency -2.74709 7.94534 -0.346 0.730192
## SpikeSetSecond -0.09991 0.11070 -0.903 0.368756
## SpikeSetThird -0.16258 0.40148 -0.405 0.686296
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2196 on 110 degrees of freedom
## Multiple R-squared: 0.8219, Adjusted R-squared: 0.8073
## F-statistic: 56.4 on 9 and 110 DF, p-value: < 2.2e-16
anova(m_Bovine.LogCopiespermLofMilk1, m_Bovine.LogCopiespermLofMilk2)
## Analysis of Variance Table
##
## Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 111 5.3125
## 2 112 5.3931 -1 -0.080615 1.6844 0.197
# Model with qPCRefficiency does not have better fit than model with SpikeSet
anova(m_Bovine.LogCopiespermLofMilk2, m_Bovine.LogCopiespermLofMilk3)
## Analysis of Variance Table
##
## Model 1: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 112 5.3931
## 2 110 5.3067 2 0.086382 0.8953 0.4114
# Model with both qPCRefficiency and SpikeSet does not have better fit than model with qPCRefficiency only
anova(m_Bovine.LogCopiespermLofMilk1, m_Bovine.LogCopiespermLofMilk3)
## Analysis of Variance Table
##
## Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 111 5.3125
## 2 110 5.3067 1 0.005767 0.1195 0.7302
# Model with both qPCRefficiency and SpikeSet does not have better fit than model with SpikeSet only
# Fit of model with both qPCRefficiency and SpikeSet is not different from fit of model with SpikeSet only
anova(m_Bovine.LogCopiespermLofMilk1, m_Bovine.LogCopiespermLofMilk2, m_Bovine.LogCopiespermLofMilk3)
## Analysis of Variance Table
##
## Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency
## Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 111 5.3125
## 2 112 5.3931 -1 -0.080615 1.6710 0.1988
## 3 110 5.3067 2 0.086382 0.8953 0.4114
#m_Bovine.LogCopiespermLofMilk1 <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Bovine.InnOnly )
AIC (m_Bovine.LogCopiespermLofMilk1)
## [1] -13.54697
#m_Bovine.LogCopiespermLofMilk2 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, data=Bovine.InnOnly )
AIC (m_Bovine.LogCopiespermLofMilk2)
## [1] -13.73969
#m_Bovine.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Bovine.InnOnly )
AIC (m_Bovine.LogCopiespermLofMilk3)
## [1] -11.67731
# Final model chosen:
# Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
# Fit Linear Model
m_Bovine.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Bovine.InnOnly )
summary(m_Bovine.LogCopiespermLofMilk)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Bovine.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.13707 -0.06085 0.00067 0.06704 0.54510
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.47765 0.05874 93.255 < 2e-16 ***
## VariableKitEZFood -0.75758 0.08288 -9.141 3.39e-15 ***
## VariableKitMastitis -0.08223 0.07292 -1.128 0.261910
## VariableKitPfood -0.58382 0.07292 -8.006 1.27e-12 ***
## VariableKitPSoilP -1.39736 0.07292 -19.162 < 2e-16 ***
## VariableKitPviralDNA -0.24831 0.07292 -3.405 0.000921 ***
## VariableKitZymoDNA -0.63703 0.07292 -8.736 2.86e-14 ***
## SpikeSetSecond -0.06591 0.05064 -1.302 0.195747
## SpikeSetThird -0.02476 0.04774 -0.519 0.604983
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2188 on 111 degrees of freedom
## Multiple R-squared: 0.8217, Adjusted R-squared: 0.8088
## F-statistic: 63.94 on 8 and 111 DF, p-value: < 2.2e-16
# Model Fit Plots
plot(x=predict(m_Bovine.LogCopiespermLofMilk),y=resid(m_Bovine.LogCopiespermLofMilk))
# using ggplot2
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(m_Bovine.LogCopiespermLofMilk, aes(x=predict(m_Bovine.LogCopiespermLofMilk), y=resid(m_Bovine.LogCopiespermLofMilk), color=VariableKit)) +
geom_point()+
theme_bw()+
ggtitle("Bovine Innoculated Only - Model Fit - Residuals vs Predicted")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
geom_hline(yintercept = 1) +
geom_hline(yintercept = -1)
# qqplots
qqnorm(resid(m_Bovine.LogCopiespermLofMilk))
qqline(resid(m_Bovine.LogCopiespermLofMilk))
summary(m_Bovine.LogCopiespermLofMilk)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Bovine.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.13707 -0.06085 0.00067 0.06704 0.54510
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.47765 0.05874 93.255 < 2e-16 ***
## VariableKitEZFood -0.75758 0.08288 -9.141 3.39e-15 ***
## VariableKitMastitis -0.08223 0.07292 -1.128 0.261910
## VariableKitPfood -0.58382 0.07292 -8.006 1.27e-12 ***
## VariableKitPSoilP -1.39736 0.07292 -19.162 < 2e-16 ***
## VariableKitPviralDNA -0.24831 0.07292 -3.405 0.000921 ***
## VariableKitZymoDNA -0.63703 0.07292 -8.736 2.86e-14 ***
## SpikeSetSecond -0.06591 0.05064 -1.302 0.195747
## SpikeSetThird -0.02476 0.04774 -0.519 0.604983
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2188 on 111 degrees of freedom
## Multiple R-squared: 0.8217, Adjusted R-squared: 0.8088
## F-statistic: 63.94 on 8 and 111 DF, p-value: < 2.2e-16
#Almost all residuals are < 1.
#Exceptions are:
Bovine.InnOnly$resid <- resid(m_Bovine.LogCopiespermLofMilk)
Bovine.InnOnly %>%
filter(abs(resid)>1) %>%
select(VariableKit,resid) %>%
group_by(VariableKit) %>%
summarize(n=n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 1 x 2
## VariableKit n
## <chr> <int>
## 1 EZFood 2
library(emmeans)
# Check Tukey-adjusted pairwise comparison of kit estimates
m_Bovine.LogCopiespermLofMilk_emmeans <- emmeans(m_Bovine.LogCopiespermLofMilk,pairwise~VariableKit)
# Use compact letter display for convenience
m_Bovine.LogCopiespermLofMilk_cld <- CLD(m_Bovine.LogCopiespermLofMilk_emmeans$emmeans,
Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
m_Bovine.LogCopiespermLofMilk_cld
## VariableKit emmean SE df lower.CL upper.CL .group
## PSoilP 4.05 0.0516 111 3.95 4.15 A
## EZFood 4.69 0.0649 111 4.56 4.82 B
## ZymoDNA 4.81 0.0516 111 4.71 4.91 B
## Pfood 4.86 0.0516 111 4.76 4.97 B
## PviralDNA 5.20 0.0516 111 5.10 5.30 C
## Mastitis 5.37 0.0516 111 5.26 5.47 CD
## COREDNA 5.45 0.0516 111 5.35 5.55 D
##
## Results are averaged over the levels of: SpikeSet
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 7 estimates
## significance level used: alpha = 0.05
m_Bovine.LogCopiespermLofMilk_cld_detail <- CLD(m_Bovine.LogCopiespermLofMilk_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
m_Bovine.LogCopiespermLofMilk_cld_detail
## $emmeans
## VariableKit emmean SE df lower.CL upper.CL .group
## PSoilP 4.05 0.0516 111 3.95 4.15 A
## EZFood 4.69 0.0649 111 4.56 4.82 B
## ZymoDNA 4.81 0.0516 111 4.71 4.91 B
## Pfood 4.86 0.0516 111 4.76 4.97 B
## PviralDNA 5.20 0.0516 111 5.10 5.30 C
## Mastitis 5.37 0.0516 111 5.26 5.47 CD
## COREDNA 5.45 0.0516 111 5.35 5.55 D
##
## Results are averaged over the levels of: SpikeSet
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 7 estimates
## significance level used: alpha = 0.05
##
## $comparisons
## contrast estimate SE df t.ratio p.value
## EZFood - PSoilP 0.6398 0.0829 111 7.719 <.0001
## ZymoDNA - PSoilP 0.7603 0.0729 111 10.426 <.0001
## ZymoDNA - EZFood 0.1206 0.0829 111 1.455 0.7707
## Pfood - PSoilP 0.8135 0.0729 111 11.156 <.0001
## Pfood - EZFood 0.1738 0.0829 111 2.097 0.3620
## Pfood - ZymoDNA 0.0532 0.0729 111 0.730 0.9905
## PviralDNA - PSoilP 1.1490 0.0729 111 15.757 <.0001
## PviralDNA - EZFood 0.5093 0.0829 111 6.145 <.0001
## PviralDNA - ZymoDNA 0.3887 0.0729 111 5.330 <.0001
## PviralDNA - Pfood 0.3355 0.0729 111 4.601 0.0002
## Mastitis - PSoilP 1.3151 0.0729 111 18.034 <.0001
## Mastitis - EZFood 0.6754 0.0829 111 8.149 <.0001
## Mastitis - ZymoDNA 0.5548 0.0729 111 7.608 <.0001
## Mastitis - Pfood 0.5016 0.0729 111 6.878 <.0001
## Mastitis - PviralDNA 0.1661 0.0729 111 2.278 0.2643
## COREDNA - PSoilP 1.3974 0.0729 111 19.162 <.0001
## COREDNA - EZFood 0.7576 0.0829 111 9.141 <.0001
## COREDNA - ZymoDNA 0.6370 0.0729 111 8.736 <.0001
## COREDNA - Pfood 0.5838 0.0729 111 8.006 <.0001
## COREDNA - PviralDNA 0.2483 0.0729 111 3.405 0.0156
## COREDNA - Mastitis 0.0822 0.0729 111 1.128 0.9183
##
## Results are averaged over the levels of: SpikeSet
## P value adjustment: tukey method for comparing a family of 7 estimates
# Get fitted values from model to plot with other software
emmeans(m_Bovine.LogCopiespermLofMilk,~ VariableKit) %>%
summary() %>%
data.frame()
## VariableKit emmean SE df lower.CL upper.CL
## 1 COREDNA 5.447426 0.05156446 111 5.345247 5.549604
## 2 EZFood 4.689842 0.06488385 111 4.561271 4.818414
## 3 Mastitis 5.365196 0.05156446 111 5.263017 5.467374
## 4 Pfood 4.863607 0.05156446 111 4.761428 4.965785
## 5 PSoilP 4.050068 0.05156446 111 3.947890 4.152247
## 6 PviralDNA 5.199113 0.05156446 111 5.096934 5.301291
## 7 ZymoDNA 4.810397 0.05156446 111 4.708218 4.912575
# Get summary
summary(emmeans(m_Bovine.LogCopiespermLofMilk,~ VariableKit), infer=TRUE)
## VariableKit emmean SE df lower.CL upper.CL t.ratio p.value
## COREDNA 5.45 0.0516 111 5.35 5.55 105.643 <.0001
## EZFood 4.69 0.0649 111 4.56 4.82 72.281 <.0001
## Mastitis 5.37 0.0516 111 5.26 5.47 104.048 <.0001
## Pfood 4.86 0.0516 111 4.76 4.97 94.321 <.0001
## PSoilP 4.05 0.0516 111 3.95 4.15 78.544 <.0001
## PviralDNA 5.20 0.0516 111 5.10 5.30 100.827 <.0001
## ZymoDNA 4.81 0.0516 111 4.71 4.91 93.289 <.0001
##
## Results are averaged over the levels of: SpikeSet
## Confidence level used: 0.95
# Plot fitted values from model
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
emmeans(m_Bovine.LogCopiespermLofMilk,~VariableKit) %>%
summary() %>%
data.frame() %>%
ggplot(aes(x=VariableKit,y=emmean,color=VariableKit)) +
geom_point() +
labs(y="Estimated Marginal Means") +
geom_errorbar(aes(ymin=lower.CL,ymax=upper.CL),width=0.5) +
geom_text(data =data.frame(m_Bovine.LogCopiespermLofMilk_cld),aes(x=VariableKit,label=`.group`),hjust=-.1) +
ylim(3.5, 8.5)+
theme_bw()+
ggtitle("Bovine Copy Numbers - Inoculated Milk Only")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
### Figure: Bovine InnOnly Raw Data + Final Model Output
df1_Bovine.rawdata<-Bovine.InnOnly[c(25,42,4)]
df2_Bovine.model<-emmeans(m_Bovine.LogCopiespermLofMilk,~VariableKit) %>%
summary() %>%
data.frame()
ggplot() +
geom_jitter(data=df1_Bovine.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
geom_point(data=df2_Bovine.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_errorbar(data=df2_Bovine.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)
# Making the plot pretty
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot() +
geom_jitter(data=df1_Bovine.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet)) +
scale_color_manual(values=Colors)+
geom_point(data=df2_Bovine.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_errorbar(data=df2_Bovine.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)+
geom_text(data =data.frame(m_Bovine.LogCopiespermLofMilk_cld),aes(x=VariableKit,label=`.group`, y=emmean),hjust=-.5) +
ylim(3.5, 6.5)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
theme_bw()+
ggtitle("Bovine Copy Numbers - Inoculated Milk Only")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
# from https://cran.r-project.org/web/packages/emmeans/vignettes/FAQs.html#contents
library(nlme)
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
# lm chosen: m_Bovine.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Bovine.InnOnly )
mod.Bovine = nlme::gls(LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Bovine.InnOnly,
weights = varIdent(form = ~1 | VariableKit))
summary(mod.Bovine)
## Generalized least squares fit by REML
## Model: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Data: Bovine.InnOnly
## AIC BIC logLik
## -128.8078 -85.45536 80.40392
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | VariableKit
## Parameter estimates:
## COREDNA EZFood Mastitis Pfood PSoilP PviralDNA ZymoDNA
## 1.0000000 7.6154180 0.9937919 0.6200963 3.0409418 1.0280090 0.8533467
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 5.462007 0.02108712 259.02104 0.0000
## VariableKitEZFood -0.767394 0.17466643 -4.39348 0.0000
## VariableKitMastitis -0.082230 0.02624097 -3.13365 0.0022
## VariableKitPfood -0.583819 0.02190093 -26.65728 0.0000
## VariableKitPSoilP -1.397357 0.05958244 -23.45251 0.0000
## VariableKitPviralDNA -0.248313 0.02669373 -9.30230 0.0000
## VariableKitZymoDNA -0.637029 0.02446865 -26.03449 0.0000
## SpikeSetSecond -0.069885 0.01717688 -4.06858 0.0001
## SpikeSetThird 0.026141 0.01716113 1.52328 0.1305
##
## Correlation:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA SpkStS
## VariableKitEZFood -0.101
## VariableKitMastitis -0.626 0.076
## VariableKitPfood -0.750 0.091 0.603
## VariableKitPSoilP -0.276 0.033 0.222 0.265
## VariableKitPviralDNA -0.615 0.074 0.495 0.593 0.218
## VariableKitZymoDNA -0.671 0.081 0.540 0.646 0.238 0.530
## SpikeSetSecond -0.407 0.025 0.000 0.000 0.000 0.000 0.000
## SpikeSetThird -0.407 0.000 0.000 0.000 0.000 0.000 0.000 0.500
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.47091970 -0.57358512 0.01701375 0.77528943 1.49714889
##
## Residual standard error: 0.07896765
## Degrees of freedom: 120 total; 111 residual
AIC(mod.Bovine)
## [1] -128.8078
# Model forcing qPCRefficiency
mod.Bovine.all = nlme::gls(LogCopiespermLofMilk ~ VariableKit + SpikeSet+qPCRefficiency, data = Bovine.InnOnly,
weights = varIdent(form = ~1 | VariableKit))
summary(mod.Bovine.all)
## Generalized least squares fit by REML
## Model: LogCopiespermLofMilk ~ VariableKit + SpikeSet + qPCRefficiency
## Data: Bovine.InnOnly
## AIC BIC logLik
## -134.968 -89.05984 84.484
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | VariableKit
## Parameter estimates:
## COREDNA EZFood Mastitis Pfood PSoilP PviralDNA ZymoDNA
## 1.0000000 8.5338537 1.0207949 0.7350816 3.2883480 1.1966519 0.9487773
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 11.116127 2.5679470 4.328799 0.0000
## VariableKitEZFood -0.737765 0.1763197 -4.184246 0.0001
## VariableKitMastitis -0.058375 0.0262665 -2.222422 0.0283
## VariableKitPfood -0.559964 0.0234365 -23.892862 0.0000
## VariableKitPSoilP -1.373503 0.0585627 -23.453530 0.0000
## VariableKitPviralDNA -0.248313 0.0261128 -9.509231 0.0000
## VariableKitZymoDNA -0.613174 0.0254982 -24.047734 0.0000
## SpikeSetSecond -0.142583 0.0388906 -3.666248 0.0004
## SpikeSetThird -0.286015 0.1405229 -2.035361 0.0442
## qPCRefficiency -6.136524 2.7871131 -2.201749 0.0298
##
## Correlation:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA SpkStS
## VariableKitEZFood 0.070
## VariableKitMastitis 0.408 0.090
## VariableKitPfood 0.458 0.100 0.646
## VariableKitPSoilP 0.183 0.040 0.259 0.290
## VariableKitPviralDNA -0.004 0.061 0.409 0.458 0.183
## VariableKitZymoDNA 0.421 0.092 0.594 0.666 0.266 0.421
## SpikeSetSecond -0.899 -0.053 -0.370 -0.415 -0.166 0.000 -0.381
## SpikeSetThird -0.993 -0.070 -0.409 -0.459 -0.184 0.000 -0.422 0.918
## qPCRefficiency -1.000 -0.071 -0.412 -0.462 -0.185 0.000 -0.425 0.897
## SpkStT
## VariableKitEZFood
## VariableKitMastitis
## VariableKitPfood
## VariableKitPSoilP
## VariableKitPviralDNA
## VariableKitZymoDNA
## SpikeSetSecond
## SpikeSetThird
## qPCRefficiency 0.993
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.50453616 -0.57224613 0.02251345 0.79650637 1.52273963
##
## Residual standard error: 0.07104128
## Degrees of freedom: 120 total; 110 residual
AIC(mod.Bovine.all)
## [1] -134.968
# It is not possible to fit model with interactions because of dropout (failure to amplify in some samples). Gls does not tolerate missing cells in interaction terms.
# Testing simpler model
mod3 = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data = Bovine.InnOnly,
weights = varIdent(form = ~1 | VariableKit))
summary(mod3)
## Generalized least squares fit by REML
## Model: LogCopiespermLofMilk ~ VariableKit
## Data: Bovine.InnOnly
## AIC BIC logLik
## -131.4578 -93.27441 79.72892
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | VariableKit
## Parameter estimates:
## COREDNA EZFood Mastitis Pfood PSoilP PviralDNA ZymoDNA
## 1.0000000 7.2595670 0.6542400 0.9721864 2.9316260 1.3382655 0.7720084
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 5.447426 0.01927334 282.64048 0e+00
## VariableKitEZFood -0.739742 0.17244196 -4.28980 0e+00
## VariableKitMastitis -0.082230 0.02303168 -3.57030 5e-04
## VariableKitPfood -0.583819 0.02688024 -21.71927 0e+00
## VariableKitPSoilP -1.397357 0.05969893 -23.40674 0e+00
## VariableKitPviralDNA -0.248313 0.03219833 -7.71199 0e+00
## VariableKitZymoDNA -0.637029 0.02434854 -26.16292 0e+00
##
## Correlation:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VariableKitEZFood -0.112
## VariableKitMastitis -0.837 0.094
## VariableKitPfood -0.717 0.080 0.600
## VariableKitPSoilP -0.323 0.036 0.270 0.231
## VariableKitPviralDNA -0.599 0.067 0.501 0.429 0.193
## VariableKitZymoDNA -0.792 0.088 0.662 0.568 0.256 0.474
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.5993333 -0.6143524 0.1093653 0.7539501 2.2432653
##
## Residual standard error: 0.08176985
## Degrees of freedom: 120 total; 113 residual
AIC(mod.Bovine)
## [1] -128.8078
AIC(mod.Bovine.all) # best model
## [1] -134.968
AIC(mod3)
## [1] -131.4578
mod.Bovine.best <- mod.Bovine.all
AIC (mod.Bovine.best)
## [1] -134.968
AIC (m_Bovine.LogCopiespermLofMilk)
## [1] -13.54697
AIC (mod3)
## [1] -131.4578
summary(mod3)
## Generalized least squares fit by REML
## Model: LogCopiespermLofMilk ~ VariableKit
## Data: Bovine.InnOnly
## AIC BIC logLik
## -131.4578 -93.27441 79.72892
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | VariableKit
## Parameter estimates:
## COREDNA EZFood Mastitis Pfood PSoilP PviralDNA ZymoDNA
## 1.0000000 7.2595670 0.6542400 0.9721864 2.9316260 1.3382655 0.7720084
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 5.447426 0.01927334 282.64048 0e+00
## VariableKitEZFood -0.739742 0.17244196 -4.28980 0e+00
## VariableKitMastitis -0.082230 0.02303168 -3.57030 5e-04
## VariableKitPfood -0.583819 0.02688024 -21.71927 0e+00
## VariableKitPSoilP -1.397357 0.05969893 -23.40674 0e+00
## VariableKitPviralDNA -0.248313 0.03219833 -7.71199 0e+00
## VariableKitZymoDNA -0.637029 0.02434854 -26.16292 0e+00
##
## Correlation:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VariableKitEZFood -0.112
## VariableKitMastitis -0.837 0.094
## VariableKitPfood -0.717 0.080 0.600
## VariableKitPSoilP -0.323 0.036 0.270 0.231
## VariableKitPviralDNA -0.599 0.067 0.501 0.429 0.193
## VariableKitZymoDNA -0.792 0.088 0.662 0.568 0.256 0.474
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.5993333 -0.6143524 0.1093653 0.7539501 2.2432653
##
## Residual standard error: 0.08176985
## Degrees of freedom: 120 total; 113 residual
summary(m_Bovine.LogCopiespermLofMilk)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Bovine.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.13707 -0.06085 0.00067 0.06704 0.54510
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.47765 0.05874 93.255 < 2e-16 ***
## VariableKitEZFood -0.75758 0.08288 -9.141 3.39e-15 ***
## VariableKitMastitis -0.08223 0.07292 -1.128 0.261910
## VariableKitPfood -0.58382 0.07292 -8.006 1.27e-12 ***
## VariableKitPSoilP -1.39736 0.07292 -19.162 < 2e-16 ***
## VariableKitPviralDNA -0.24831 0.07292 -3.405 0.000921 ***
## VariableKitZymoDNA -0.63703 0.07292 -8.736 2.86e-14 ***
## SpikeSetSecond -0.06591 0.05064 -1.302 0.195747
## SpikeSetThird -0.02476 0.04774 -0.519 0.604983
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2188 on 111 degrees of freedom
## Multiple R-squared: 0.8217, Adjusted R-squared: 0.8088
## F-statistic: 63.94 on 8 and 111 DF, p-value: < 2.2e-16
#model not assuming homoscedasticity and including VariableKit + SpikeSet + qPCRefficiency is a much better fit than any of the alternatives
# qqplots
qqnorm(resid(m_Bovine.LogCopiespermLofMilk))
qqline(resid(m_Bovine.LogCopiespermLofMilk))
qqnorm(resid(mod.Bovine.best))
qqline(resid(mod.Bovine.best))
library(emmeans)
# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Bovine.best_emmeans <- emmeans(mod.Bovine.best,pairwise~VariableKit, mode = "df.error")
# Use compact letter display for convenience
mod.Bovine.best_cld <- CLD(mod.Bovine.best_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Bovine.best_cld_letters <- CLD(mod.Bovine.best_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Bovine.best_cld_letters
## VariableKit emmean SE df lower.CL upper.CL .group
## PSoilP 4.06 0.0552 103 3.95 4.17 A
## EZFood 4.70 0.1752 103 4.35 5.04 BC
## ZymoDNA 4.82 0.0164 103 4.79 4.85 B
## Pfood 4.87 0.0130 103 4.85 4.90 B
## PviralDNA 5.18 0.0211 103 5.14 5.23 C
## Mastitis 5.37 0.0176 103 5.34 5.41 D
## COREDNA 5.43 0.0180 103 5.40 5.47 D
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: df.error
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 7 estimates
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(mod.Bovine.best,~ VariableKit,mode = "df.error") %>%
summary() %>%
data.frame()
## VariableKit emmean SE df lower.CL upper.CL
## 1 COREDNA 5.432929 0.01799250 103 5.397245 5.468613
## 2 EZFood 4.695164 0.17517917 103 4.347738 5.042591
## 3 Mastitis 5.374554 0.01761333 103 5.339622 5.409486
## 4 Pfood 4.872965 0.01302185 103 4.847139 4.898791
## 5 PSoilP 4.059427 0.05522585 103 3.949899 4.168954
## 6 PviralDNA 5.184616 0.02109142 103 5.142786 5.226446
## 7 ZymoDNA 4.819755 0.01644564 103 4.787139 4.852371
# Get summary
summary(emmeans(mod.Bovine.best,~ VariableKit,mode = "df.error"), infer=TRUE)
## VariableKit emmean SE df lower.CL upper.CL t.ratio p.value
## COREDNA 5.43 0.0180 103 5.40 5.47 301.955 <.0001
## EZFood 4.70 0.1752 103 4.35 5.04 26.802 <.0001
## Mastitis 5.37 0.0176 103 5.34 5.41 305.141 <.0001
## Pfood 4.87 0.0130 103 4.85 4.90 374.214 <.0001
## PSoilP 4.06 0.0552 103 3.95 4.17 73.506 <.0001
## PviralDNA 5.18 0.0211 103 5.14 5.23 245.816 <.0001
## ZymoDNA 4.82 0.0164 103 4.79 4.85 293.072 <.0001
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: df.error
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod_df1_Bovine.rawdata<-Bovine.InnOnly[c(25,42,4)]
mod_df2_Bovine.best.model<-emmeans(mod.Bovine.best,~VariableKit, mode = "df.error") %>%
summary() %>%
data.frame()
ggplot() +
geom_jitter(data=mod_df1_Bovine.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
geom_point(data=mod_df2_Bovine.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_errorbar(data=mod_df2_Bovine.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)
# Making the plot pretty
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot() +
geom_jitter(data=mod_df1_Bovine.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet)) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Bovine.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Bovine.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Bovine.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
theme_bw()+
ggtitle("Bovine Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
Model not assuming homoscedasticity and including VariableKit + SpikeSet + qPCRefficiency was chosen.
qPCRefficiency is forced into all final models
Formula: mod.Bovine.all = nlme::gls(LogCopiespermLofMilk ~ VariableKit + SpikeSet + qPCRefficiency, data = Bovine.InnOnly,weights = varIdent(form = ~1 | VariableKit))
AIC(mod.Bovine.all) # best model
-134.968
Other Models for Reference:
mod.Bovine = nlme::gls(LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Bovine.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod.Bovine)
-128.8078
mod3 = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data = Bovine.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod3)
-131.4578
Previously chosen Linear Model that assumed homoscedasticity for reference:
m_Bovine.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Bovine.InnOnly )
AIC(m_Bovine.LogCopiespermLofMilk)
-13.54697
Manuscript Figures: Bovine
# Bovine: Milk Data and Controls
Bovine.Inn.Ctrl <- Bovine %>% filter(VariableSampleType!="NP40InoculatedMilk")
dim(Bovine.Inn.Ctrl)
## [1] 240 42
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2338B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
Bovine.Inn.Ctrl$VariableSampleType <- factor(Bovine.Inn.Ctrl$VariableSampleType, levels=c('InoculatedMilk', 'UninoculatedMilk', 'NoTemplateControl', 'MockCommunity'))
ggplot(data=Bovine.Inn.Ctrl, aes(VariableKit,LogCopiespermLofMilk, color= VariableKit, shape=SpikeSet), show.legend = F)+
scale_shape_discrete(solid=F) +
ylab ("Bovine Log10 Copies / mL of Milk")+
xlab ("Kit")+
geom_point(aes(colour = VariableKit), size = 2, stroke = .5, position=position_jitterdodge(jitter.width=0, dodge.width = 1), show.legend = F) +
facet_wrap(vars(VariableSampleType),nrow = 1)+
ggtitle("Bovine DNA Copy Numbers - All Samples and Controls")+
theme_bw()+
ylim(0, 9)+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=90,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Warning: Removed 80 rows containing missing values (geom_point).
ggsave("Bovine-AllSamples.TIFF", width = 9, height = 3,units = "in", dpi = 600)
## Warning: Removed 80 rows containing missing values (geom_point).
ggplot(Bovine.InnOnly, aes(VariableKit,LogCopiespermLofMilk,shape = factor(SpikeSet))) +
scale_shape_discrete(solid=F) +
geom_point(aes(colour = VariableKit), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = 1)) +
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Bovine DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
# Plot overlaying model estimates to raw data
mod_df1_Bovine.rawdata<-Bovine.InnOnly[c(25,42,4)]
mod_df2_Bovine.best.model<-emmeans(mod3,~VariableKit, mode = "df.error") %>%
summary() %>%
data.frame()
ggplot() +
geom_jitter(data=mod_df1_Bovine.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2,stroke = 1, width = .2 ) +
scale_shape_discrete(solid=F) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Bovine.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Bovine.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Bovine.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.7, nudge_x = -0.05, fontface = "bold") +
ylim(3, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Bovine DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
ggsave("Bovine-Model-Jitter.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)
ggplot() +
geom_point(data=mod_df1_Bovine.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = .5)) +
scale_shape_discrete(solid=F) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Bovine.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Bovine.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Bovine.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = .7, nudge_x = -0.05, fontface = "bold") +
ylim(3, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Bovine DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
ggsave("Bovine-Model.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)
Total Bacterial DNA
#Total Bacterial DNA
#Data File: CleanDNAprepData1.18.19
library(ggplot2)
library(dplyr)
library(emmeans)
library(multcompView)
#Filter Subset from Sample Data
TotalBacterialDNA <- SampleData %>% filter(Assay=="Total Bacterial DNA")
dim(TotalBacterialDNA)
## [1] 240 42
#Summary Statistics
TotalBacterialDNA.summary <- TotalBacterialDNA %>%
group_by(VariableKit,VariableSampleType) %>%
summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
st_dev=sd(LogCopiespermLofMilk,na.rm=T),
n_missing=sum(is.na(LogCopiespermLofMilk)),
n_total=n())%>%
data.frame()
## `summarise()` regrouping output by 'VariableKit' (override with `.groups` argument)
write.table (TotalBacterialDNA.summary, "TotalBacterialDNA.summary.txt", sep="\t" )
#Plot Raw Means and Standard Deviations
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(data=TotalBacterialDNA,
mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk,z=VariableKit, color=VariableKit, ylab="Copy Numbers")) +
ylab ("Log10 Copies / mL of Milk")+
geom_boxplot(lwd=1)+
theme_bw()+
ggtitle("TotalBacterialDNA Copy Numbers")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk", "MockCommunity", "NoTemplateControl"))
## Warning: Removed 22 rows containing non-finite values (stat_boxplot).
ggplot(data=TotalBacterialDNA,
mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
ylab ("Log10 Copies / mL of Milk")+
geom_jitter(width=0.25)+
ggtitle("TotalBacterialDNA Copy Numbers")+
theme_bw()+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk", "MockCommunity", "NoTemplateControl"))
## Warning: Removed 22 rows containing missing values (geom_point).
ggplot(data=TotalBacterialDNA,
mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=SpikeSet, shape=VariableKit)) +
ylab ("Log10 Copies / mL of Milk")+
geom_jitter(width=0.35)+
ggtitle("TotalBacterialDNA Copy Numbers")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk", "MockCommunity", "NoTemplateControl"))
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 58 rows containing missing values (geom_point).
# Milk Data and Controls
TotalBacterialDNA.Inn.Ctrl <- TotalBacterialDNA %>% filter(VariableSampleType!="NP40InoculatedMilk")
dim(TotalBacterialDNA.Inn.Ctrl)
## [1] 240 42
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2338B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
TotalBacterialDNA.Inn.Ctrl$VariableSampleType <- factor(TotalBacterialDNA.Inn.Ctrl$VariableSampleType, levels=c('InoculatedMilk', 'UninoculatedMilk', 'NoTemplateControl', 'MockCommunity'))
ggplot(data=TotalBacterialDNA.Inn.Ctrl, aes(VariableKit,LogCopiespermLofMilk, color= VariableKit, shape=SpikeSet),show.legend = F)+
scale_shape_discrete(solid=F) +
ylab ("Bacteria Log10 Copies / mL of Milk")+
geom_jitter(width=0.25, size=2.5, stroke=0.7)+
facet_wrap(vars(VariableSampleType),nrow = 1)+
ggtitle("Bacteria DNA Copy Numbers - All Samples and Controls")+
theme_bw()+
ylim(0, 9)+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Warning: Removed 22 rows containing missing values (geom_point).
# Inoculated Milk Data
TotalBacterialDNA.InnOnly <- TotalBacterialDNA %>% filter(VariableSampleType=="InoculatedMilk", LogCopiespermLofMilk>0.001)
TotalBacterialDNA.InnOnly %>%
group_by(VariableSampleType, VariableSpikeSet, VariableKit) %>%
summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
st_dev=sd(LogCopiespermLofMilk,na.rm=T),
n_missing=sum(is.na(LogCopiespermLofMilk)),
n_total=n())%>%
data.frame()
## `summarise()` regrouping output by 'VariableSampleType', 'VariableSpikeSet' (override with `.groups` argument)
## VariableSampleType VariableSpikeSet VariableKit mean_LogCopiespermLofMilk
## 1 InoculatedMilk First COREDNA 6.565413
## 2 InoculatedMilk First EZFood 4.277616
## 3 InoculatedMilk First Mastitis 6.805970
## 4 InoculatedMilk First Pfood 6.076956
## 5 InoculatedMilk First PSoilP 5.698609
## 6 InoculatedMilk First PviralDNA 6.192312
## 7 InoculatedMilk First ZymoDNA 6.346418
## 8 InoculatedMilk Second COREDNA 7.490835
## 9 InoculatedMilk Second Mastitis 7.668798
## 10 InoculatedMilk Second Pfood 6.631503
## 11 InoculatedMilk Second PSoilP 6.439095
## 12 InoculatedMilk Second PviralDNA 7.121062
## 13 InoculatedMilk Second ZymoDNA 6.697743
## 14 InoculatedMilk Third COREDNA 6.881582
## 15 InoculatedMilk Third EZFood 4.342899
## 16 InoculatedMilk Third Mastitis 8.053032
## 17 InoculatedMilk Third Pfood 7.418800
## 18 InoculatedMilk Third PSoilP 6.795545
## 19 InoculatedMilk Third PviralDNA 7.556803
## 20 InoculatedMilk Third ZymoDNA 7.282966
## st_dev n_missing n_total
## 1 0.05259444 0 6
## 2 1.45768760 0 6
## 3 0.01589447 0 6
## 4 0.02287900 0 6
## 5 0.11744216 0 6
## 6 0.03599713 0 6
## 7 0.03366165 0 6
## 8 0.02140125 0 6
## 9 0.02543933 0 6
## 10 0.04452933 0 6
## 11 0.15846125 0 6
## 12 0.07745249 0 6
## 13 0.04461625 0 6
## 14 1.74763906 0 5
## 15 2.30314702 0 4
## 16 0.06686248 0 6
## 17 0.03041420 0 6
## 18 0.08899794 0 6
## 19 0.10432742 0 6
## 20 0.12384595 0 6
#3 linear models were compared: including SpikeSet only, qPCRefficiency only, and both as covariates. Best model fit was used as the final model.
m_TotalBacterialDNA.LogCopiespermLofMilk1 <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=TotalBacterialDNA.InnOnly )
summary(m_TotalBacterialDNA.LogCopiespermLofMilk1)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = TotalBacterialDNA.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2707 -0.1573 0.0449 0.1796 1.8855
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.47037 0.17757 36.439 < 2e-16 ***
## VariableKitEZFood -2.54574 0.26294 -9.682 2.47e-16 ***
## VariableKitMastitis 0.50018 0.21946 2.279 0.02463 *
## VariableKitPfood -0.30000 0.21946 -1.367 0.17448
## VariableKitPSoilP -0.69800 0.21946 -3.180 0.00192 **
## VariableKitPviralDNA -0.05236 0.21946 -0.239 0.81189
## VariableKitZymoDNA -0.23338 0.21946 -1.063 0.28998
## SpikeSetSecond 0.66840 0.15065 4.437 2.21e-05 ***
## SpikeSetThird 0.94775 0.14458 6.555 1.96e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6488 on 108 degrees of freedom
## Multiple R-squared: 0.6727, Adjusted R-squared: 0.6484
## F-statistic: 27.74 on 8 and 108 DF, p-value: < 2.2e-16
m_TotalBacterialDNA.LogCopiespermLofMilk2 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, data=TotalBacterialDNA.InnOnly )
summary(m_TotalBacterialDNA.LogCopiespermLofMilk2)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency,
## data = TotalBacterialDNA.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7187 -0.3802 -0.1493 0.5690 2.3270
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.45105 0.59965 9.090 5.07e-15 ***
## VariableKitEZFood -2.45939 0.30825 -7.979 1.60e-12 ***
## VariableKitMastitis 0.59092 0.25322 2.334 0.02145 *
## VariableKitPfood -0.20926 0.25322 -0.826 0.41038
## VariableKitPSoilP -0.60726 0.25322 -2.398 0.01817 *
## VariableKitPviralDNA -0.02169 0.25200 -0.086 0.93156
## VariableKitZymoDNA -0.14263 0.25322 -0.563 0.57439
## qPCRefficiency 1.77000 0.65975 2.683 0.00844 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7451 on 109 degrees of freedom
## Multiple R-squared: 0.5642, Adjusted R-squared: 0.5362
## F-statistic: 20.16 on 7 and 109 DF, p-value: < 2.2e-16
m_TotalBacterialDNA.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=TotalBacterialDNA.InnOnly )
summary(m_TotalBacterialDNA.LogCopiespermLofMilk3)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency +
## SpikeSet, data = TotalBacterialDNA.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3078 -0.1452 0.0405 0.2129 1.8860
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.53077 2.07380 3.631 0.000434 ***
## VariableKitEZFood -2.60744 0.28994 -8.993 9.65e-15 ***
## VariableKitMastitis 0.45066 0.24043 1.874 0.063607 .
## VariableKitPfood -0.34952 0.24043 -1.454 0.148947
## VariableKitPSoilP -0.74753 0.24043 -3.109 0.002405 **
## VariableKitPviralDNA -0.05442 0.22025 -0.247 0.805317
## VariableKitZymoDNA -0.28290 0.24043 -1.177 0.241951
## qPCRefficiency -1.39836 2.72463 -0.513 0.608849
## SpikeSetSecond 1.01868 0.69906 1.457 0.147983
## SpikeSetThird 1.04245 0.23473 4.441 2.19e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.651 on 107 degrees of freedom
## Multiple R-squared: 0.6735, Adjusted R-squared: 0.646
## F-statistic: 24.52 on 9 and 107 DF, p-value: < 2.2e-16
anova(m_TotalBacterialDNA.LogCopiespermLofMilk1, m_TotalBacterialDNA.LogCopiespermLofMilk2, m_TotalBacterialDNA.LogCopiespermLofMilk3)
## Analysis of Variance Table
##
## Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency
## Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 108 45.455
## 2 109 60.514 -1 -15.059 35.536 3.263e-08 ***
## 3 107 45.343 2 15.171 17.900 1.969e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Model with qPCRefficiency does not have better fit than model with SpikeSet
AIC(m_TotalBacterialDNA.LogCopiespermLofMilk1)
## [1] 241.4133
AIC(m_TotalBacterialDNA.LogCopiespermLofMilk2)
## [1] 272.8932
AIC(m_TotalBacterialDNA.LogCopiespermLofMilk3)
## [1] 243.1257
# Final model chosen:
# Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
# No ddifference in fit, simplest model chosen
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'lme4'
## The following object is masked from 'package:nlme':
##
## lmList
library(lmerTest)
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
model1 = lmer(LogCopiespermLofMilk ~ VariableKit + (1|SpikeSet),
data=TotalBacterialDNA.InnOnly,
REML=TRUE)
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
## Data: TotalBacterialDNA.InnOnly
##
## REML criterion at convergence: 242.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.0127 -0.2393 0.0585 0.3041 2.9078
##
## Random effects:
## Groups Name Variance Std.Dev.
## SpikeSet (Intercept) 0.2273 0.4768
## Residual 0.4209 0.6487
## Number of obs: 117, groups: SpikeSet, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 7.00804 0.31709 3.21399 22.101 0.000127 ***
## VariableKitEZFood -2.55195 0.26272 108.34313 -9.714 < 2e-16 ***
## VariableKitMastitis 0.50122 0.21946 108.01544 2.284 0.024331 *
## VariableKitPfood -0.29896 0.21946 108.01544 -1.362 0.175950
## VariableKitPSoilP -0.69696 0.21946 108.01544 -3.176 0.001948 **
## VariableKitPviralDNA -0.05132 0.21946 108.01544 -0.234 0.815553
## VariableKitZymoDNA -0.23233 0.21946 108.01544 -1.059 0.292108
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VarblKtEZFd -0.298
## VrblKtMstts -0.356 0.431
## VariblKtPfd -0.356 0.431 0.515
## VarblKtPSlP -0.356 0.431 0.515 0.515
## VrblKtPvDNA -0.356 0.431 0.515 0.515 0.515
## VrblKtZyDNA -0.356 0.431 0.515 0.515 0.515 0.515
model2 = lmer(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1|SpikeSet),
data=TotalBacterialDNA.InnOnly,
REML=TRUE)
summary(model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
## Data: TotalBacterialDNA.InnOnly
##
## REML criterion at convergence: 239.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.0040 -0.2417 0.0572 0.2971 2.8908
##
## Random effects:
## Groups Name Variance Std.Dev.
## SpikeSet (Intercept) 0.2930 0.5413
## Residual 0.4231 0.6504
## Number of obs: 117, groups: SpikeSet, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.97517 1.78073 6.85156 3.917 0.00602 **
## VariableKitEZFood -2.54882 0.28021 104.66468 -9.096 6.67e-15 ***
## VariableKitMastitis 0.50236 0.23143 102.87457 2.171 0.03226 *
## VariableKitPfood -0.29782 0.23143 102.87457 -1.287 0.20103
## VariableKitPSoilP -0.69582 0.23143 102.87457 -3.007 0.00332 **
## VariableKitPviralDNA -0.05148 0.22006 107.41131 -0.234 0.81548
## VariableKitZymoDNA -0.23120 0.23143 102.87457 -0.999 0.32015
## qPCRefficiency 0.03828 2.01965 7.28234 0.019 0.98539
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA
## VarblKtEZFd -0.384
## VrblKtMstts -0.364 0.490
## VariblKtPfd -0.364 0.490 0.561
## VarblKtPSlP -0.364 0.490 0.561 0.561
## VrblKtPvDNA -0.078 0.410 0.494 0.494 0.494
## VrblKtZyDNA -0.364 0.490 0.561 0.561 0.561 0.494
## qPCReffcncy -0.980 0.341 0.310 0.310 0.310 0.014 0.310
anova(model1, model2)
## refitting model(s) with ML (instead of REML)
## Data: TotalBacterialDNA.InnOnly
## Models:
## model1: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
## model2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model1 9 250.67 275.53 -116.33 232.67
## model2 10 252.56 280.18 -116.28 232.56 0.1137 1 0.736
AIC (model1)
## [1] 260.691
AIC (model2)
## [1] 259.5033
m_TotalBacterialDNA.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=TotalBacterialDNA.InnOnly )
summary(m_TotalBacterialDNA.LogCopiespermLofMilk)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = TotalBacterialDNA.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2707 -0.1573 0.0449 0.1796 1.8855
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.47037 0.17757 36.439 < 2e-16 ***
## VariableKitEZFood -2.54574 0.26294 -9.682 2.47e-16 ***
## VariableKitMastitis 0.50018 0.21946 2.279 0.02463 *
## VariableKitPfood -0.30000 0.21946 -1.367 0.17448
## VariableKitPSoilP -0.69800 0.21946 -3.180 0.00192 **
## VariableKitPviralDNA -0.05236 0.21946 -0.239 0.81189
## VariableKitZymoDNA -0.23338 0.21946 -1.063 0.28998
## SpikeSetSecond 0.66840 0.15065 4.437 2.21e-05 ***
## SpikeSetThird 0.94775 0.14458 6.555 1.96e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6488 on 108 degrees of freedom
## Multiple R-squared: 0.6727, Adjusted R-squared: 0.6484
## F-statistic: 27.74 on 8 and 108 DF, p-value: < 2.2e-16
plot(x=predict(m_TotalBacterialDNA.LogCopiespermLofMilk),y=resid(m_TotalBacterialDNA.LogCopiespermLofMilk))
# using ggplot2
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(m_TotalBacterialDNA.LogCopiespermLofMilk, aes(x=predict(m_TotalBacterialDNA.LogCopiespermLofMilk), y=resid(m_TotalBacterialDNA.LogCopiespermLofMilk), color=VariableKit)) +
geom_point()+
theme_bw()+
ggtitle("Total Bacterial DNA Innoculated Only - Model Fit - Residuals vs Predicted")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
geom_hline(yintercept = 1) +
geom_hline(yintercept = -1)
# qqplots
qqnorm(resid(m_TotalBacterialDNA.LogCopiespermLofMilk))
qqline(resid(m_TotalBacterialDNA.LogCopiespermLofMilk))
summary(m_TotalBacterialDNA.LogCopiespermLofMilk)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = TotalBacterialDNA.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2707 -0.1573 0.0449 0.1796 1.8855
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.47037 0.17757 36.439 < 2e-16 ***
## VariableKitEZFood -2.54574 0.26294 -9.682 2.47e-16 ***
## VariableKitMastitis 0.50018 0.21946 2.279 0.02463 *
## VariableKitPfood -0.30000 0.21946 -1.367 0.17448
## VariableKitPSoilP -0.69800 0.21946 -3.180 0.00192 **
## VariableKitPviralDNA -0.05236 0.21946 -0.239 0.81189
## VariableKitZymoDNA -0.23338 0.21946 -1.063 0.28998
## SpikeSetSecond 0.66840 0.15065 4.437 2.21e-05 ***
## SpikeSetThird 0.94775 0.14458 6.555 1.96e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6488 on 108 degrees of freedom
## Multiple R-squared: 0.6727, Adjusted R-squared: 0.6484
## F-statistic: 27.74 on 8 and 108 DF, p-value: < 2.2e-16
#Many large residuals were identified, most belong to EZFood
TotalBacterialDNA.InnOnly$resid <- resid(m_TotalBacterialDNA.LogCopiespermLofMilk)
TotalBacterialDNA.InnOnly %>%
filter(abs(resid)>1) %>%
select(VariableKit,resid) %>%
group_by(VariableKit) %>%
summarize(n=n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
## VariableKit n
## <chr> <int>
## 1 COREDNA 2
## 2 EZFood 8
# Check Tukey-adjusted pairwise comparison of kit estimates
m_TotalBacterialDNA.LogCopiespermLofMilk_emmeans <- emmeans(m_TotalBacterialDNA.LogCopiespermLofMilk,pairwise~VariableKit)
# Use compact letter display for convenience
m_TotalBacterialDNA.LogCopiespermLofMilk_cld <- CLD(m_TotalBacterialDNA.LogCopiespermLofMilk_emmeans$emmeans,
Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
m_TotalBacterialDNA.LogCopiespermLofMilk_cld
## VariableKit emmean SE df lower.CL upper.CL .group
## EZFood 4.46 0.210 108 4.05 4.88 A
## PSoilP 6.31 0.153 108 6.01 6.61 B
## Pfood 6.71 0.153 108 6.41 7.01 BC
## ZymoDNA 6.78 0.153 108 6.47 7.08 BC
## PviralDNA 6.96 0.153 108 6.65 7.26 BCD
## COREDNA 7.01 0.157 108 6.70 7.32 CD
## Mastitis 7.51 0.153 108 7.21 7.81 D
##
## Results are averaged over the levels of: SpikeSet
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 7 estimates
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(m_TotalBacterialDNA.LogCopiespermLofMilk,~ VariableKit) %>%
summary() %>%
data.frame()
## VariableKit emmean SE df lower.CL upper.CL
## 1 COREDNA 7.009084 0.1574248 108 6.697041 7.321127
## 2 EZFood 4.463344 0.2103444 108 4.046405 4.880283
## 3 Mastitis 7.509267 0.1529121 108 7.206169 7.812365
## 4 Pfood 6.709086 0.1529121 108 6.405988 7.012185
## 5 PSoilP 6.311083 0.1529121 108 6.007985 6.614182
## 6 PviralDNA 6.956726 0.1529121 108 6.653627 7.259824
## 7 ZymoDNA 6.775709 0.1529121 108 6.472611 7.078807
# Plot fitted values from model
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
emmeans(m_TotalBacterialDNA.LogCopiespermLofMilk,~VariableKit) %>%
summary() %>%
data.frame() %>%
ggplot(aes(x=VariableKit,y=emmean,color=VariableKit)) +
geom_point() +
labs(y="Estimated Marginal Means") +
geom_errorbar(aes(ymin=lower.CL,ymax=upper.CL),width=0.5) +
geom_text(data=data.frame(m_TotalBacterialDNA.LogCopiespermLofMilk_cld),aes(x=VariableKit,label=`.group`),hjust=-.1) +
ylim(3.5, 8.5)+
theme_bw()+
ggtitle("Total 16S Bacterial DNA Copy Numbers - Inoculated Milk Only")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
# from https://cran.r-project.org/web/packages/emmeans/vignettes/FAQs.html#contents
library(nlme)
# lm chosen: Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
mod.Bacteria = nlme::gls(LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=TotalBacterialDNA.InnOnly,
weights = varIdent(form = ~1 | VariableKit))
summary(mod.Bacteria)
## Generalized least squares fit by REML
## Model: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Data: TotalBacterialDNA.InnOnly
## AIC BIC logLik
## 15.55897 58.47307 8.220514
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | VariableKit
## Parameter estimates:
## COREDNA EZFood Mastitis Pfood PSoilP PviralDNA ZymoDNA
## 1.00000000 1.84385607 0.04438187 0.16888223 0.13620445 0.09116882 0.22206408
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 6.322454 0.2400657 26.33634 0.0000
## VariableKitEZFood -2.518615 0.6243592 -4.03392 0.0001
## VariableKitMastitis 0.491624 0.2400002 2.04843 0.0429
## VariableKitPfood -0.308557 0.2429852 -1.26986 0.2069
## VariableKitPSoilP -0.706560 0.2418687 -2.92125 0.0042
## VariableKitPviralDNA -0.060918 0.2407165 -0.25307 0.8007
## VariableKitZymoDNA -0.241934 0.2452972 -0.98629 0.3262
## SpikeSetSecond 0.835842 0.0210066 39.78950 0.0000
## SpikeSetThird 1.249727 0.0210053 59.49570 0.0000
##
## Correlation:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA SpkStS
## VariableKitEZFood -0.384
## VariableKitMastitis -0.998 0.384
## VariableKitPfood -0.986 0.379 0.986
## VariableKitPSoilP -0.990 0.381 0.990 0.978
## VariableKitPviralDNA -0.995 0.383 0.995 0.983 0.987
## VariableKitZymoDNA -0.976 0.375 0.977 0.965 0.969 0.974
## SpikeSetSecond -0.044 0.010 0.000 0.000 0.000 0.000 0.000
## SpikeSetThird -0.041 0.002 -0.003 -0.003 -0.003 -0.003 -0.003 0.500
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -3.4641570 -0.7559524 0.2070587 0.7225366 2.3754575
##
## Residual standard error: 0.9886226
## Degrees of freedom: 117 total; 108 residual
AIC(m_TotalBacterialDNA.LogCopiespermLofMilk1)
## [1] 241.4133
AIC(mod.Bacteria)
## [1] 15.55897
# Testing simpler model
mod3.Bacteria = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=TotalBacterialDNA.InnOnly,
weights = varIdent(form = ~1 | VariableKit))
summary(mod3.Bacteria)
## Generalized least squares fit by REML
## Model: LogCopiespermLofMilk ~ VariableKit
## Data: TotalBacterialDNA.InnOnly
## AIC BIC logLik
## 254.6237 292.4305 -113.3119
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | VariableKit
## Parameter estimates:
## COREDNA EZFood Mastitis Pfood PSoilP PviralDNA ZymoDNA
## 1.0000000 1.7810698 0.5579628 0.5883981 0.5024870 0.6119406 0.4192329
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 6.985023 0.2338778 29.866127 0.0000
## VariableKitEZFood -2.681294 0.5913342 -4.534312 0.0000
## VariableKitMastitis 0.524243 0.2660484 1.970482 0.0513
## VariableKitPfood -0.275937 0.2694144 -1.024210 0.3080
## VariableKitPSoilP -0.673940 0.2602741 -2.589347 0.0109
## VariableKitPviralDNA -0.028298 0.2721103 -0.103993 0.9174
## VariableKitZymoDNA -0.209314 0.2525438 -0.828824 0.4090
##
## Correlation:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VariableKitEZFood -0.396
## VariableKitMastitis -0.879 0.348
## VariableKitPfood -0.868 0.343 0.763
## VariableKitPSoilP -0.899 0.355 0.790 0.780
## VariableKitPviralDNA -0.859 0.340 0.756 0.746 0.772
## VariableKitZymoDNA -0.926 0.366 0.814 0.804 0.832 0.796
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.9426303 -1.0190259 0.1486182 0.8902093 1.5164719
##
## Residual standard error: 0.9643028
## Degrees of freedom: 117 total; 110 residual
AIC(m_TotalBacterialDNA.LogCopiespermLofMilk1)
## [1] 241.4133
AIC(mod.Bacteria) #mod.Bacteria is best model (including SpikeSet)
## [1] 15.55897
AIC(mod3.Bacteria)
## [1] 254.6237
# Model forcing qPCRefficiency
mod.Bacteria.all = nlme::gls(LogCopiespermLofMilk ~ VariableKit + SpikeSet+qPCRefficiency, data = TotalBacterialDNA.InnOnly,
weights = varIdent(form = ~1 | VariableKit))
summary(mod.Bacteria.all)
## Generalized least squares fit by REML
## Model: LogCopiespermLofMilk ~ VariableKit + SpikeSet + qPCRefficiency
## Data: TotalBacterialDNA.InnOnly
## AIC BIC logLik
## 10.10053 55.53862 11.94973
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | VariableKit
## Parameter estimates:
## COREDNA EZFood Mastitis Pfood PSoilP PviralDNA ZymoDNA
## 1.00000000 1.81740972 0.04922275 0.16045698 0.12966370 0.07614551 0.20596264
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 7.287423 0.4049640 17.99524 0.0000
## VariableKitEZFood -2.573698 0.6244850 -4.12131 0.0001
## VariableKitMastitis 0.445932 0.2434423 1.83178 0.0698
## VariableKitPfood -0.354248 0.2460925 -1.43949 0.1529
## VariableKitPSoilP -0.752251 0.2450807 -3.06940 0.0027
## VariableKitPviralDNA -0.063134 0.2433529 -0.25943 0.7958
## VariableKitZymoDNA -0.287625 0.2479698 -1.15992 0.2487
## SpikeSetSecond 1.164434 0.1126872 10.33333 0.0000
## SpikeSetThird 1.348696 0.0378059 35.67421 0.0000
## qPCRefficiency -1.280854 0.4293443 -2.98328 0.0035
##
## Correlation:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA SpkStS
## VariableKitEZFood -0.256
## VariableKitMastitis -0.647 0.389
## VariableKitPfood -0.640 0.385 0.987
## VariableKitPSoilP -0.643 0.387 0.991 0.980
## VariableKitPviralDNA -0.600 0.388 0.994 0.984 0.988
## VariableKitZymoDNA -0.636 0.382 0.980 0.969 0.973 0.976
## SpikeSetSecond 0.780 -0.026 -0.061 -0.061 -0.061 -0.003 -0.060
## SpikeSetThird 0.640 -0.022 -0.053 -0.052 -0.052 -0.004 -0.052 0.859
## qPCRefficiency -0.800 0.029 0.063 0.062 0.062 0.003 0.061 -0.981
## SpkStT
## VariableKitEZFood
## VariableKitMastitis
## VariableKitPfood
## VariableKitPSoilP
## VariableKitPviralDNA
## VariableKitZymoDNA
## SpikeSetSecond
## SpikeSetThird
## qPCRefficiency -0.819
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -3.4624783 -0.7305480 0.1434528 0.8160697 2.3154272
##
## Residual standard error: 1.000626
## Degrees of freedom: 117 total; 107 residual
AIC(mod.Bacteria.all)
## [1] 10.10053
#mod.Bacteria.all not assuming homoscedasticity and including SpikeSet and qPCRefficiency is a better fit
mod.Bacteria.best <- mod.Bacteria.all
AIC(mod.Bacteria.best)
## [1] 10.10053
library(emmeans)
# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Bacteria.best_emmeans <- emmeans(mod.Bacteria.best,pairwise~VariableKit, mode = "df.error")
# Use compact letter display for convenience
mod.Bacteria.best_cld <- CLD(mod.Bacteria.best_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Bacteria.best_cld_letters <- CLD(mod.Bacteria.best_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Bacteria.best_cld_letters
## VariableKit emmean SE df lower.CL upper.CL .group
## EZFood 4.486 0.57513 100 3.344 5.627 A
## PSoilP 6.307 0.03061 100 6.246 6.368 B
## Pfood 6.705 0.03787 100 6.630 6.780 C
## ZymoDNA 6.772 0.04860 100 6.675 6.868 C
## PviralDNA 6.996 0.02229 100 6.952 7.040 D
## COREDNA 7.059 0.24308 100 6.577 7.542 CDE
## Mastitis 7.505 0.01169 100 7.482 7.528 E
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: df.error
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 7 estimates
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(mod.Bacteria.best,~ VariableKit,mode = "df.error") %>%
summary() %>%
data.frame()
## VariableKit emmean SE df lower.CL upper.CL
## 1 COREDNA 7.059235 0.24308315 100 6.576965 7.541505
## 2 EZFood 4.485538 0.57512880 100 3.344498 5.626577
## 3 Mastitis 7.505168 0.01169020 100 7.481975 7.528361
## 4 Pfood 6.704987 0.03786867 100 6.629857 6.780118
## 5 PSoilP 6.306984 0.03061200 100 6.246251 6.367718
## 6 PviralDNA 6.996101 0.02228736 100 6.951883 7.040318
## 7 ZymoDNA 6.771610 0.04859566 100 6.675197 6.868022
# Get summary
summary(emmeans(mod.Bacteria.best,~ VariableKit,mode = "df.error"), infer=TRUE)
## VariableKit emmean SE df lower.CL upper.CL t.ratio p.value
## COREDNA 7.059 0.24308 100 6.577 7.542 29.040 <.0001
## EZFood 4.486 0.57513 100 3.344 5.627 7.799 <.0001
## Mastitis 7.505 0.01169 100 7.482 7.528 642.005 <.0001
## Pfood 6.705 0.03787 100 6.630 6.780 177.059 <.0001
## PSoilP 6.307 0.03061 100 6.246 6.368 206.030 <.0001
## PviralDNA 6.996 0.02229 100 6.952 7.040 313.904 <.0001
## ZymoDNA 6.772 0.04860 100 6.675 6.868 139.346 <.0001
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: df.error
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod_df1_Bacteria.rawdata<-TotalBacterialDNA.InnOnly[c(25,42,4)]
mod_df2_Bacteria.best.model<-emmeans(mod.Bacteria.best,~VariableKit, mode = "df.error") %>%
summary() %>%
data.frame()
ggplot() +
geom_jitter(data=mod_df1_Bacteria.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
geom_point(data=mod_df2_Bacteria.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_errorbar(data=mod_df2_Bacteria.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)
# Making the plot pretty
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot() +
geom_jitter(data=mod_df1_Bacteria.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet)) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Bacteria.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Bacteria.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Bacteria.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
theme_bw()+
ggtitle("Bacteria Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
Model not assuming homoscedasticity and including VariableKit + SpikeSet + qPCRefficiency was chosen.
qPCRefficiency is forced into all final models
Formula: mod.Bacteria.all = nlme::gls(LogCopiespermLofMilk ~ VariableKit + SpikeSet + qPCRefficiency, data = TotalBacterialDNA.InnOnly.InnOnly,weights = varIdent(form = ~1 | VariableKit))
AIC(mod.Bacteria.all) # best model
10.10053
Other Models for Reference:
mod.Bacteria = nlme::gls(LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=TotalBacterialDNA.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod.Bacteria) 15.55897
mod3.Bacteria = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=TotalBacterialDNA.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod3.Bacteria)
254.6237
Previously chosen Linear Model that assumed homoscedasticity for reference:
m_TotalBacterialDNA.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=TotalBacterialDNA.InnOnly )
AIC(m_TotalBacterialDNA.LogCopiespermLofMilk1)
241.4133
Manuscript Figures: Total Bacteria
# Total Bacteria: Milk Data and Controls
Bacteria.Inn.Ctrl <- TotalBacterialDNA %>% filter(VariableSampleType!="NP40InoculatedMilk")
dim(Bacteria.Inn.Ctrl)
## [1] 240 42
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2338B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
Bacteria.Inn.Ctrl$VariableSampleType <- factor(Bacteria.Inn.Ctrl$VariableSampleType, levels=c('InoculatedMilk', 'UninoculatedMilk', 'NoTemplateControl', 'MockCommunity'))
ggplot(data=Bacteria.Inn.Ctrl, aes(VariableKit,LogCopiespermLofMilk, color= VariableKit, shape=SpikeSet))+
scale_shape_discrete(solid=F) +
ylab ("Bacteria Log10 Copies / mL of Milk")+
xlab ("Kit")+
geom_point(aes(colour = VariableKit), size = 2, stroke = .5, position=position_jitterdodge(jitter.width=0, dodge.width = 1), show.legend = F) +
facet_wrap(vars(VariableSampleType),nrow = 1)+
ggtitle("Total Bacteria DNA Copy Numbers - All Samples and Controls")+
theme_bw()+
ylim(0, 9)+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=90,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Warning: Removed 22 rows containing missing values (geom_point).
ggsave("Bacteria-AllSamples.TIFF", width = 9, height = 3,units = "in", dpi = 600)
## Warning: Removed 22 rows containing missing values (geom_point).
ggplot(TotalBacterialDNA.InnOnly, aes(VariableKit,LogCopiespermLofMilk,shape = factor(SpikeSet))) +
scale_shape_discrete(solid=F) +
geom_point(aes(colour = VariableKit), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = 1)) +
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Total Bacteria DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
# Plot overlaying model estimates to raw data
mod_df1_Bacteria.rawdata<-TotalBacterialDNA.InnOnly[c(25,42,4)]
mod_df2_Bacteria.best.model<-emmeans(mod.Bacteria.best,~VariableKit, mode = "df.error") %>%
summary() %>%
data.frame()
ggplot() +
geom_jitter(data=mod_df1_Bacteria.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2,stroke = 1, width = .2 ) +
scale_shape_discrete(solid=F) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Bacteria.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Bacteria.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Bacteria.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = -3, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Bacteria DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
ggsave("Bacteria-Model-Jitter.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)
ggplot() +
geom_point(data=mod_df1_Bacteria.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = .5)) +
scale_shape_discrete(solid=F) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Bacteria.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Bacteria.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Bacteria.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = -2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Bacteria DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
ggsave("Bacteria-Model.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)
Bacillus
#Data File: CleanDNAprepData1.18.19
library(ggplot2)
library(dplyr)
library(emmeans)
library(multcompView)
#Filter Subset from Sample Data
Bacillus <- SampleData %>% filter(Assay=="Bacillus wiedmannii")
dim(Bacillus)
## [1] 240 42
#Summary Statistics
Bacillus.summary <- Bacillus %>%
group_by(VariableKit,VariableSampleType) %>%
summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
st_dev=sd(LogCopiespermLofMilk,na.rm=T),
n_missing=sum(is.na(LogCopiespermLofMilk)),
n_total=n())%>%
data.frame()
## `summarise()` regrouping output by 'VariableKit' (override with `.groups` argument)
write.table (Bacillus.summary, "Bacillus.summary.txt", sep="\t" )
#Plot Raw Means and Standard Deviations
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(data=Bacillus,
mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk,z=VariableKit, color=VariableKit, ylab="Copy Numbers")) +
ylab ("Log10 Copies / mL of Milk")+
geom_boxplot(lwd=1)+
theme_bw()+
ggtitle("Bacillus Copy Numbers")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk", "MockCommunity", "NoTemplateControl"))
## Warning: Removed 87 rows containing non-finite values (stat_boxplot).
ggplot(data=Bacillus,
mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
ylab ("Log10 Copies / mL of Milk")+
geom_jitter(width=0.25)+
ggtitle("Bacillus Copy Numbers")+
theme_bw()+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk", "MockCommunity", "NoTemplateControl"))
## Warning: Removed 87 rows containing missing values (geom_point).
ggplot(data=Bacillus,
mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=SpikeSet, shape=VariableKit)) +
ylab ("Log10 Copies / mL of Milk")+
geom_jitter(width=0.35)+
ggtitle("Bacillus Copy Numbers")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk", "MockCommunity", "NoTemplateControl"))
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 111 rows containing missing values (geom_point).
# Inoculated Milk Data
Bacillus.InnOnly <- Bacillus %>% filter(VariableSampleType=="InoculatedMilk", LogCopiespermLofMilk>0.001)
Bacillus.InnOnly %>%
group_by(VariableSampleType, VariableSpikeSet, VariableKit) %>%
summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
st_dev=sd(LogCopiespermLofMilk,na.rm=T),
n_missing=sum(is.na(LogCopiespermLofMilk)),
n_total=n())%>%
data.frame()
## `summarise()` regrouping output by 'VariableSampleType', 'VariableSpikeSet' (override with `.groups` argument)
## VariableSampleType VariableSpikeSet VariableKit mean_LogCopiespermLofMilk
## 1 InoculatedMilk First COREDNA 6.126799
## 2 InoculatedMilk First EZFood 4.199089
## 3 InoculatedMilk First Mastitis 6.541652
## 4 InoculatedMilk First Pfood 5.417196
## 5 InoculatedMilk First PSoilP 5.311014
## 6 InoculatedMilk First PviralDNA 5.518817
## 7 InoculatedMilk First ZymoDNA 6.028139
## 8 InoculatedMilk Second COREDNA 6.433043
## 9 InoculatedMilk Second Mastitis 6.469291
## 10 InoculatedMilk Second Pfood 5.662830
## 11 InoculatedMilk Second PSoilP 5.317932
## 12 InoculatedMilk Second PviralDNA 6.091386
## 13 InoculatedMilk Second ZymoDNA 5.641826
## 14 InoculatedMilk Third COREDNA 7.019606
## 15 InoculatedMilk Third EZFood 4.500444
## 16 InoculatedMilk Third Mastitis 7.012577
## 17 InoculatedMilk Third Pfood 6.400803
## 18 InoculatedMilk Third PSoilP 6.186386
## 19 InoculatedMilk Third PviralDNA 6.674293
## 20 InoculatedMilk Third ZymoDNA 6.397461
## st_dev n_missing n_total
## 1 0.05946479 0 6
## 2 0.99412039 0 5
## 3 0.02719433 0 6
## 4 0.92199266 0 6
## 5 0.18316303 0 6
## 6 0.08786240 0 6
## 7 0.03599635 0 6
## 8 0.09141025 0 6
## 9 0.01607210 0 6
## 10 0.08101734 0 6
## 11 0.17244727 0 6
## 12 0.08779476 0 6
## 13 0.02664997 0 6
## 14 0.02640080 0 6
## 15 0.18655080 0 3
## 16 0.04008723 0 6
## 17 0.03383823 0 5
## 18 0.12209311 0 6
## 19 0.07478131 0 6
## 20 0.09433067 0 6
#3 linear models were compared: including SpikeSet only, qPCRefficiency only, and both as covariates. Best model fit was used as the final model.
m_Bacillus.LogCopiespermLofMilk1 <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Bacillus.InnOnly )
summary(m_Bacillus.LogCopiespermLofMilk1)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Bacillus.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.00590 -0.12365 0.02376 0.14482 1.18301
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.24938 0.08909 70.144 < 2e-16 ***
## VariableKitEZFood -2.21559 0.14237 -15.562 < 2e-16 ***
## VariableKitMastitis 0.14802 0.11004 1.345 0.181452
## VariableKitPfood -0.70594 0.11168 -6.321 6.26e-09 ***
## VariableKitPSoilP -0.92137 0.11004 -8.373 2.50e-13 ***
## VariableKitPviralDNA -0.43165 0.11004 -3.923 0.000156 ***
## VariableKitZymoDNA -0.50401 0.11004 -4.580 1.27e-05 ***
## SpikeSetSecond 0.08916 0.07689 1.160 0.248844
## SpikeSetThird 0.74214 0.07455 9.955 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3301 on 106 degrees of freedom
## Multiple R-squared: 0.8191, Adjusted R-squared: 0.8054
## F-statistic: 59.98 on 8 and 106 DF, p-value: < 2.2e-16
m_Bacillus.LogCopiespermLofMilk2 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, data=Bacillus.InnOnly )
summary(m_Bacillus.LogCopiespermLofMilk2)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency,
## data = Bacillus.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.25228 -0.24690 -0.04217 0.38180 0.90585
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.5051 0.3725 17.466 < 2e-16 ***
## VariableKitEZFood -2.2118 0.2066 -10.704 < 2e-16 ***
## VariableKitMastitis 0.1484 0.1586 0.936 0.35141
## VariableKitPfood -0.7329 0.1609 -4.555 1.39e-05 ***
## VariableKitPSoilP -0.9210 0.1586 -5.807 6.60e-08 ***
## VariableKitPviralDNA -0.4316 0.1585 -2.724 0.00753 **
## VariableKitZymoDNA -0.5036 0.1586 -3.175 0.00196 **
## qPCRefficiency 0.0284 0.4722 0.060 0.95215
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4753 on 107 degrees of freedom
## Multiple R-squared: 0.6214, Adjusted R-squared: 0.5966
## F-statistic: 25.08 on 7 and 107 DF, p-value: < 2.2e-16
m_Bacillus.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Bacillus.InnOnly )
summary(m_Bacillus.LogCopiespermLofMilk3)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency +
## SpikeSet, data = Bacillus.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.09840 -0.07968 0.02759 0.11999 1.13063
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.986714 1.969535 6.594 1.77e-09 ***
## VariableKitEZFood -2.411255 0.147215 -16.379 < 2e-16 ***
## VariableKitMastitis -0.004911 0.113983 -0.043 0.965719
## VariableKitPfood -0.861498 0.115718 -7.445 2.81e-11 ***
## VariableKitPSoilP -1.074306 0.113983 -9.425 1.20e-15 ***
## VariableKitPviralDNA -0.431651 0.104867 -4.116 7.69e-05 ***
## VariableKitZymoDNA -0.656942 0.113983 -5.763 8.33e-08 ***
## qPCRefficiency -10.473575 3.058912 -3.424 0.000881 ***
## SpikeSetSecond 2.526123 0.715502 3.531 0.000617 ***
## SpikeSetThird 1.727832 0.296519 5.827 6.25e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3146 on 105 degrees of freedom
## Multiple R-squared: 0.8372, Adjusted R-squared: 0.8233
## F-statistic: 60.02 on 9 and 105 DF, p-value: < 2.2e-16
# Fit of model with both qPCRefficiency and SpikeSet is slightly better than fit of model with SpikeSet only
anova(m_Bacillus.LogCopiespermLofMilk1, m_Bacillus.LogCopiespermLofMilk2, m_Bacillus.LogCopiespermLofMilk3)
## Analysis of Variance Table
##
## Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency
## Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 106 11.553
## 2 107 24.177 -1 -12.624 127.549 < 2.2e-16 ***
## 3 105 10.392 2 13.784 69.636 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(m_Bacillus.LogCopiespermLofMilk1)
## [1] 82.0835
AIC(m_Bacillus.LogCopiespermLofMilk2)
## [1] 165.0082
AIC(m_Bacillus.LogCopiespermLofMilk3)
## [1] 71.91107
# Final model chosen:
# Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
library(lme4)
library(lmerTest)
model1 = lmer(LogCopiespermLofMilk ~ VariableKit + (1|SpikeSet),
data=Bacillus.InnOnly,
REML=TRUE)
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
## Data: Bacillus.InnOnly
##
## REML criterion at convergence: 94.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.0884 -0.3868 0.0730 0.4565 3.5691
##
## Random effects:
## Groups Name Variance Std.Dev.
## SpikeSet (Intercept) 0.1614 0.4018
## Residual 0.1090 0.3301
## Number of obs: 115, groups: SpikeSet, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.5265 0.2447 2.3902 26.675 0.000514 ***
## VariableKitEZFood -2.2154 0.1423 106.1124 -15.565 < 2e-16 ***
## VariableKitMastitis 0.1480 0.1100 106.0017 1.345 0.181451
## VariableKitPfood -0.7064 0.1117 106.0037 -6.325 6.14e-09 ***
## VariableKitPSoilP -0.9214 0.1100 106.0017 -8.373 2.50e-13 ***
## VariableKitPviralDNA -0.4317 0.1100 106.0017 -3.923 0.000156 ***
## VariableKitZymoDNA -0.5040 0.1100 106.0017 -4.580 1.27e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VarblKtEZFd -0.174
## VrblKtMstts -0.225 0.387
## VariblKtPfd -0.222 0.380 0.493
## VarblKtPSlP -0.225 0.387 0.500 0.493
## VrblKtPvDNA -0.225 0.387 0.500 0.493 0.500
## VrblKtZyDNA -0.225 0.387 0.500 0.493 0.500 0.500
model2 = lmer(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1|SpikeSet),
data=Bacillus.InnOnly,
REML=TRUE)
summary(model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
## Data: Bacillus.InnOnly
##
## REML criterion at convergence: 83.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.6253 -0.2423 0.0675 0.3670 3.6148
##
## Random effects:
## Groups Name Variance Std.Dev.
## SpikeSet (Intercept) 1.32448 1.1509
## Residual 0.09911 0.3148
## Number of obs: 115, groups: SpikeSet, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 13.12759 2.21117 23.01830 5.937 4.70e-06 ***
## VariableKitEZFood -2.38084 0.14566 106.96631 -16.346 < 2e-16 ***
## VariableKitMastitis 0.01988 0.11263 106.85800 0.177 0.86023
## VariableKitPfood -0.83631 0.11434 106.84974 -7.314 5.00e-11 ***
## VariableKitPSoilP -1.04951 0.11263 106.85800 -9.318 1.80e-15 ***
## VariableKitPviralDNA -0.43165 0.10494 104.68351 -4.113 7.79e-05 ***
## VariableKitZymoDNA -0.63215 0.11263 106.85800 -5.612 1.59e-07 ***
## qPCRefficiency -8.77574 2.80201 41.55916 -3.132 0.00318 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA
## VarblKtEZFd -0.362
## VrblKtMstts -0.368 0.467
## VariblKtPfd -0.369 0.462 0.560
## VarblKtPSlP -0.368 0.467 0.566 0.560
## VrblKtPvDNA -0.024 0.360 0.466 0.459 0.466
## VrblKtZyDNA -0.368 0.467 0.566 0.560 0.566 0.466
## qPCReffcncy -0.953 0.362 0.363 0.364 0.363 0.000 0.363
anova(model1, model2)
## refitting model(s) with ML (instead of REML)
## Data: Bacillus.InnOnly
## Models:
## model1: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
## model2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model1 9 94.236 118.94 -38.118 76.236
## model2 10 90.603 118.05 -35.301 70.603 5.6334 1 0.01762 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC (model1)
## [1] 112.5384
AIC (model2)
## [1] 103.7505
AIC(m_Bacillus.LogCopiespermLofMilk3) #still has better fit
## [1] 71.91107
m_Bacillus.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Bacillus.InnOnly )
summary(m_Bacillus.LogCopiespermLofMilk)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency +
## SpikeSet, data = Bacillus.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.09840 -0.07968 0.02759 0.11999 1.13063
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.986714 1.969535 6.594 1.77e-09 ***
## VariableKitEZFood -2.411255 0.147215 -16.379 < 2e-16 ***
## VariableKitMastitis -0.004911 0.113983 -0.043 0.965719
## VariableKitPfood -0.861498 0.115718 -7.445 2.81e-11 ***
## VariableKitPSoilP -1.074306 0.113983 -9.425 1.20e-15 ***
## VariableKitPviralDNA -0.431651 0.104867 -4.116 7.69e-05 ***
## VariableKitZymoDNA -0.656942 0.113983 -5.763 8.33e-08 ***
## qPCRefficiency -10.473575 3.058912 -3.424 0.000881 ***
## SpikeSetSecond 2.526123 0.715502 3.531 0.000617 ***
## SpikeSetThird 1.727832 0.296519 5.827 6.25e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3146 on 105 degrees of freedom
## Multiple R-squared: 0.8372, Adjusted R-squared: 0.8233
## F-statistic: 60.02 on 9 and 105 DF, p-value: < 2.2e-16
plot(x=predict(m_Bacillus.LogCopiespermLofMilk),y=resid(m_Bacillus.LogCopiespermLofMilk))
# using ggplot2
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(m_Bacillus.LogCopiespermLofMilk, aes(x=predict(m_Bacillus.LogCopiespermLofMilk), y=resid(m_Bacillus.LogCopiespermLofMilk), color=VariableKit)) +
geom_point()+
theme_bw()+
ggtitle("Bacillus Innoculated Only - Model Fit - Residuals vs Predicted")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
geom_hline(yintercept = 1) +
geom_hline(yintercept = -1)
# qqplots
qqnorm(resid(m_Bacillus.LogCopiespermLofMilk))
qqline(resid(m_Bacillus.LogCopiespermLofMilk))
summary(m_Bacillus.LogCopiespermLofMilk)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency +
## SpikeSet, data = Bacillus.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.09840 -0.07968 0.02759 0.11999 1.13063
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.986714 1.969535 6.594 1.77e-09 ***
## VariableKitEZFood -2.411255 0.147215 -16.379 < 2e-16 ***
## VariableKitMastitis -0.004911 0.113983 -0.043 0.965719
## VariableKitPfood -0.861498 0.115718 -7.445 2.81e-11 ***
## VariableKitPSoilP -1.074306 0.113983 -9.425 1.20e-15 ***
## VariableKitPviralDNA -0.431651 0.104867 -4.116 7.69e-05 ***
## VariableKitZymoDNA -0.656942 0.113983 -5.763 8.33e-08 ***
## qPCRefficiency -10.473575 3.058912 -3.424 0.000881 ***
## SpikeSetSecond 2.526123 0.715502 3.531 0.000617 ***
## SpikeSetThird 1.727832 0.296519 5.827 6.25e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3146 on 105 degrees of freedom
## Multiple R-squared: 0.8372, Adjusted R-squared: 0.8233
## F-statistic: 60.02 on 9 and 105 DF, p-value: < 2.2e-16
#Few large residuals were identified, most belong to EZFood
Bacillus.InnOnly$resid <- resid(m_Bacillus.LogCopiespermLofMilk)
Bacillus.InnOnly %>%
filter(abs(resid)>1) %>%
select(VariableKit,resid) %>%
group_by(VariableKit) %>%
summarize(n=n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
## VariableKit n
## <chr> <int>
## 1 EZFood 2
## 2 Pfood 1
# Check Tukey-adjusted pairwise comparison of kit estimates
m_Bacillus.LogCopiespermLofMilk_emmeans <- emmeans(m_Bacillus.LogCopiespermLofMilk,pairwise~VariableKit)
# Use compact letter display for convenience
m_Bacillus.LogCopiespermLofMilk_cld <- CLD(m_Bacillus.LogCopiespermLofMilk_emmeans$emmeans,
Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
m_Bacillus.LogCopiespermLofMilk_cld
## VariableKit emmean SE df lower.CL upper.CL .group
## EZFood 4.28 0.1141 105 4.05 4.50 A
## PSoilP 5.61 0.0742 105 5.47 5.76 B
## Pfood 5.83 0.0764 105 5.67 5.98 BC
## ZymoDNA 6.03 0.0742 105 5.88 6.18 CD
## PviralDNA 6.26 0.0878 105 6.08 6.43 D
## Mastitis 6.68 0.0742 105 6.54 6.83 E
## COREDNA 6.69 0.0878 105 6.51 6.86 E
##
## Results are averaged over the levels of: SpikeSet
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 7 estimates
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(m_Bacillus.LogCopiespermLofMilk,~ VariableKit) %>%
summary() %>%
data.frame()
## VariableKit emmean SE df lower.CL upper.CL
## 1 COREDNA 6.687205 0.08776095 105 6.513191 6.861219
## 2 EZFood 4.275950 0.11407101 105 4.049768 4.502131
## 3 Mastitis 6.682295 0.07418720 105 6.535195 6.829394
## 4 Pfood 5.825707 0.07635634 105 5.674307 5.977108
## 5 PSoilP 5.612899 0.07418720 105 5.465799 5.759998
## 6 PviralDNA 6.255554 0.08776095 105 6.081540 6.429568
## 7 ZymoDNA 6.030263 0.07418720 105 5.883164 6.177363
# Plot fitted values from model
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
emmeans(m_Bacillus.LogCopiespermLofMilk,~VariableKit) %>%
summary() %>%
data.frame() %>%
ggplot(aes(x=VariableKit,y=emmean,color=VariableKit)) +
geom_point() +
labs(y="Estimated Marginal Means") +
geom_errorbar(aes(ymin=lower.CL,ymax=upper.CL),width=0.5) +
geom_text(data=data.frame(m_Bacillus.LogCopiespermLofMilk_cld),aes(x=VariableKit,label=`.group`),hjust=-.1) +
theme_bw()+
ggtitle("Bacillus Copy Numbers - Inoculated Milk Only")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
# from https://cran.r-project.org/web/packages/emmeans/vignettes/FAQs.html#contents
library(nlme)
# Final model chosen:
# Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
# m_Bacillus.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Bacillus.InnOnly )
mod.Bacillus = nlme::gls(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Bacillus.InnOnly,
weights = varIdent(form = ~1 | VariableKit))
summary(mod.Bacillus)
## Generalized least squares fit by REML
## Model: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
## Data: Bacillus.InnOnly
## AIC BIC logLik
## -48.92209 -3.804763 41.46104
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | VariableKit
## Parameter estimates:
## COREDNA EZFood Mastitis Pfood PSoilP PviralDNA ZymoDNA
## 1.0000000 10.8226076 0.4338227 7.9479602 3.2866606 1.7999703 1.9815334
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 13.157805 0.4998031 26.325978 0.0000
## VariableKitEZFood -2.418889 0.2708292 -8.931416 0.0000
## VariableKitMastitis -0.007651 0.0214136 -0.357305 0.7216
## VariableKitPfood -0.868575 0.1374938 -6.317193 0.0000
## VariableKitPSoilP -1.077047 0.0582532 -18.489047 0.0000
## VariableKitPviralDNA -0.431651 0.0342402 -12.606569 0.0000
## VariableKitZymoDNA -0.659682 0.0386293 -17.077236 0.0000
## qPCRefficiency -10.661269 0.7808058 -13.654188 0.0000
## SpikeSetSecond 2.542006 0.1867661 13.610639 0.0000
## SpikeSetThird 1.622224 0.0790107 20.531702 0.0000
##
## Correlation:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA qPCRff
## VariableKitEZFood -0.051
## VariableKitMastitis -0.558 0.074
## VariableKitPfood -0.088 0.012 0.139
## VariableKitPSoilP -0.205 0.027 0.326 0.051
## VariableKitPviralDNA -0.016 0.030 0.377 0.059 0.139
## VariableKitZymoDNA -0.309 0.041 0.491 0.077 0.181 0.209
## qPCRefficiency -0.999 0.049 0.532 0.084 0.196 0.000 0.295
## SpikeSetSecond 0.995 -0.047 -0.531 -0.084 -0.195 0.000 -0.294 -0.997
## SpikeSetThird 0.977 -0.047 -0.522 -0.082 -0.192 0.000 -0.289 -0.981
## SpkStS
## VariableKitEZFood
## VariableKitMastitis
## VariableKitPfood
## VariableKitPSoilP
## VariableKitPviralDNA
## VariableKitZymoDNA
## qPCRefficiency
## SpikeSetSecond
## SpikeSetThird 0.986
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -3.8274028 -0.6343972 0.1141199 0.6254420 1.6446439
##
## Residual standard error: 0.0705496
## Degrees of freedom: 115 total; 105 residual
AIC(m_Bacillus.LogCopiespermLofMilk3)
## [1] 71.91107
AIC(mod.Bacillus)
## [1] -48.92209
# Testing simpler model
mod3.Bacillus = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=Bacillus.InnOnly,
weights = varIdent(form = ~1 | VariableKit))
summary(mod3.Bacillus)
## Generalized least squares fit by REML
## Model: LogCopiespermLofMilk ~ VariableKit
## Data: Bacillus.InnOnly
## AIC BIC logLik
## 167.8347 205.3845 -69.91734
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | VariableKit
## Parameter estimates:
## COREDNA EZFood Mastitis Pfood PSoilP PviralDNA ZymoDNA
## 1.0000000 2.0050656 0.6461050 1.7238368 1.1640521 1.2739433 0.8354346
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 6.526483 0.09097939 71.73584 0.0000
## VariableKitEZFood -2.214386 0.28835800 -7.67929 0.0000
## VariableKitMastitis 0.148024 0.10831713 1.36658 0.1746
## VariableKitPfood -0.733296 0.18525902 -3.95822 0.0001
## VariableKitPSoilP -0.921372 0.13961757 -6.59925 0.0000
## VariableKitPviralDNA -0.431651 0.14734537 -2.92952 0.0041
## VariableKitZymoDNA -0.504007 0.11855110 -4.25139 0.0000
##
## Correlation:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VariableKitEZFood -0.316
## VariableKitMastitis -0.840 0.265
## VariableKitPfood -0.491 0.155 0.412
## VariableKitPSoilP -0.652 0.206 0.547 0.320
## VariableKitPviralDNA -0.617 0.195 0.519 0.303 0.402
## VariableKitZymoDNA -0.767 0.242 0.645 0.377 0.500 0.474
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -3.38996911 -0.80202011 -0.09719026 0.97395001 1.62044688
##
## Residual standard error: 0.3859929
## Degrees of freedom: 115 total; 108 residual
AIC(m_Bacillus.LogCopiespermLofMilk3)
## [1] 71.91107
AIC(mod.Bacillus)
## [1] -48.92209
AIC(mod3.Bacillus) #mod.Bacillus is best model (including qPCRefficiency and SpikeSet)
## [1] 167.8347
#mod.Bacillus not assuming homoscedasticity and including qPCRefficiency and SpikeSet has better fit
mod.Bacillus.best <- mod.Bacillus
# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Bacillus_emmeans <- emmeans(mod.Bacillus,pairwise~VariableKit)
# Use compact letter display for convenience
mod.Bacillus_cld <- CLD(mod.Bacillus_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Bacillus_cld_letters <- CLD(mod.Bacillus_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Bacillus_cld
## $emmeans
## VariableKit emmean SE df lower.CL upper.CL .group
## EZFood 4.271 0.269995 7.01 3.633 4.910 A
## PSoilP 5.613 0.054656 16.72 5.498 5.728 B
## Pfood 5.822 0.135998 15.98 5.533 6.110 BCD
## ZymoDNA 6.030 0.032955 15.73 5.960 6.100 C
## PviralDNA 6.258 0.032240 14.08 6.189 6.328 D
## Mastitis 6.682 0.007237 13.38 6.667 6.698 E
## COREDNA 6.690 0.020496 13.76 6.646 6.734 E
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: satterthwaite
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 7 estimates
## significance level used: alpha = 0.05
##
## $comparisons
## contrast estimate SE df t.ratio p.value
## PSoilP - EZFood 1.34184 0.2755 7.59 4.871 0.0151
## Pfood - EZFood 1.55031 0.3023 10.71 5.128 0.0047
## Pfood - PSoilP 0.20847 0.1466 21.02 1.422 0.7842
## ZymoDNA - EZFood 1.75921 0.2720 7.22 6.468 0.0033
## ZymoDNA - PSoilP 0.41736 0.0638 27.30 6.540 <.0001
## ZymoDNA - Pfood 0.20889 0.1399 17.86 1.493 0.7454
## PviralDNA - EZFood 1.98724 0.2720 7.21 7.307 0.0015
## PviralDNA - PSoilP 0.64540 0.0633 26.35 10.188 <.0001
## PviralDNA - Pfood 0.43692 0.1397 17.75 3.127 0.0712
## PviralDNA - ZymoDNA 0.22803 0.0460 29.39 4.962 0.0005
## Mastitis - EZFood 2.41124 0.2701 7.02 8.927 0.0005
## Mastitis - PSoilP 1.06940 0.0551 17.32 19.399 <.0001
## Mastitis - Pfood 0.86092 0.1362 16.07 6.322 0.0002
## Mastitis - ZymoDNA 0.65203 0.0337 17.37 19.330 <.0001
## Mastitis - PviralDNA 0.42400 0.0328 15.08 12.914 <.0001
## COREDNA - EZFood 2.41889 0.2708 7.09 8.931 0.0005
## COREDNA - PSoilP 1.07705 0.0583 21.06 18.489 <.0001
## COREDNA - Pfood 0.86857 0.1375 16.69 6.317 0.0001
## COREDNA - ZymoDNA 0.65968 0.0386 25.28 17.077 <.0001
## COREDNA - PviralDNA 0.43165 0.0342 20.74 12.607 <.0001
## COREDNA - Mastitis 0.00765 0.0214 16.70 0.357 0.9998
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: satterthwaite
## P value adjustment: tukey method for comparing a family of 7 estimates
# Get fitted values from model to plot with other software
emmeans(mod.Bacillus,~ VariableKit) %>%
summary() %>%
data.frame()
## VariableKit emmean SE df lower.CL upper.CL
## 1 COREDNA 6.690085 0.020495821 13.764217 6.646055 6.734115
## 2 EZFood 4.271197 0.269995430 7.006771 3.632884 4.909509
## 3 Mastitis 6.682434 0.007237233 13.383434 6.666844 6.698024
## 4 Pfood 5.821511 0.135997649 15.983425 5.533184 6.109837
## 5 PSoilP 5.613038 0.054655974 16.724284 5.497579 5.728497
## 6 PviralDNA 6.258434 0.032240343 14.084091 6.189324 6.327544
## 7 ZymoDNA 6.030403 0.032955438 15.730850 5.960443 6.100363
# Get summary
summary(emmeans(mod.Bacillus,~ VariableKit), infer=TRUE)
## VariableKit emmean SE df lower.CL upper.CL t.ratio p.value
## COREDNA 6.690 0.020496 13.76 6.646 6.734 326.412 <.0001
## EZFood 4.271 0.269995 7.01 3.633 4.910 15.819 <.0001
## Mastitis 6.682 0.007237 13.38 6.667 6.698 923.341 <.0001
## Pfood 5.822 0.135998 15.98 5.533 6.110 42.806 <.0001
## PSoilP 5.613 0.054656 16.72 5.498 5.728 102.698 <.0001
## PviralDNA 6.258 0.032240 14.08 6.189 6.328 194.118 <.0001
## ZymoDNA 6.030 0.032955 15.73 5.960 6.100 182.987 <.0001
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: satterthwaite
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod.Bacillus_df1_Bacillus.rawdata<-Bacillus.InnOnly[c(25,42,4)]
mod.Bacillus_df2_Bacillus.model<-emmeans(mod.Bacillus,~VariableKit) %>%
summary() %>%
data.frame()
ggplot() +
geom_jitter(data=mod.Bacillus_df1_Bacillus.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
geom_point(data=mod.Bacillus_df2_Bacillus.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_errorbar(data=mod.Bacillus_df2_Bacillus.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)
# Making the plot pretty
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot() +
geom_jitter(data=mod.Bacillus_df1_Bacillus.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod.Bacillus_df2_Bacillus.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)+
geom_point(data=mod.Bacillus_df2_Bacillus.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Bacillus_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold")+
ylim(3.0, 8.0)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
theme_bw()+
ggtitle("Bacillus Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
library(emmeans)
# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Bacillus.best_emmeans <- emmeans(mod.Bacillus.best,pairwise~VariableKit, mode = "df.error")
# Use compact letter display for convenience
mod.Bacillus.best_cld <- CLD(mod.Bacillus.best_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Bacillus.best_cld_letters <- CLD(mod.Bacillus.best_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Bacillus.best_cld_letters
## VariableKit emmean SE df lower.CL upper.CL .group
## EZFood 4.271 0.269995 98 3.735 4.807 A
## PSoilP 5.613 0.054656 98 5.505 5.722 B
## Pfood 5.822 0.135998 98 5.552 6.091 BC
## ZymoDNA 6.030 0.032955 98 5.965 6.096 C
## PviralDNA 6.258 0.032240 98 6.194 6.322 D
## Mastitis 6.682 0.007237 98 6.668 6.697 E
## COREDNA 6.690 0.020496 98 6.649 6.731 E
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: df.error
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 7 estimates
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(mod.Bacillus.best,~ VariableKit,mode = "df.error") %>%
summary() %>%
data.frame()
## VariableKit emmean SE df lower.CL upper.CL
## 1 COREDNA 6.690085 0.020495821 98 6.649412 6.730759
## 2 EZFood 4.271197 0.269995430 98 3.735400 4.806994
## 3 Mastitis 6.682434 0.007237233 98 6.668072 6.696796
## 4 Pfood 5.821511 0.135997649 98 5.551628 6.091394
## 5 PSoilP 5.613038 0.054655974 98 5.504575 5.721501
## 6 PviralDNA 6.258434 0.032240343 98 6.194454 6.322414
## 7 ZymoDNA 6.030403 0.032955438 98 5.965004 6.095802
# Get summary
summary(emmeans(mod.Bacillus.best,~ VariableKit,mode = "df.error"), infer=TRUE)
## VariableKit emmean SE df lower.CL upper.CL t.ratio p.value
## COREDNA 6.690 0.020496 98 6.649 6.731 326.412 <.0001
## EZFood 4.271 0.269995 98 3.735 4.807 15.819 <.0001
## Mastitis 6.682 0.007237 98 6.668 6.697 923.341 <.0001
## Pfood 5.822 0.135998 98 5.552 6.091 42.806 <.0001
## PSoilP 5.613 0.054656 98 5.505 5.722 102.698 <.0001
## PviralDNA 6.258 0.032240 98 6.194 6.322 194.118 <.0001
## ZymoDNA 6.030 0.032955 98 5.965 6.096 182.987 <.0001
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: df.error
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod_df1_Bacillus.rawdata<-Bacillus.InnOnly[c(25,42,4)]
mod_df2_Bacillus.best.model<-emmeans(mod.Bacillus.best,~VariableKit, mode = "df.error") %>%
summary() %>%
data.frame()
ggplot() +
geom_jitter(data=mod_df1_Bacillus.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
geom_point(data=mod_df2_Bacillus.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_errorbar(data=mod_df2_Bacillus.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)
# Making the plot pretty
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot() +
geom_jitter(data=mod_df1_Bacillus.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet)) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Bacillus.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Bacillus.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Bacillus.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
theme_bw()+
ggtitle("Bacillus Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
Model not assuming homoscedasticity and including VariableKit + SpikeSet + qPCRefficiency was chosen.
qPCRefficiency is forced into all final models
Formula:mod.Bacillus = nlme::gls(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Bacillus.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod.Bacillus)
-48.92209
Other Models for Reference:
Formula: mod3.Bacillus = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=Bacillus.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod3.Bacillus)
167.8347
Previously chosen Linear Model that assumed homoscedasticity for reference:
Formula: m_Bacillus.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Bacillus.InnOnly )
AIC(m_Bacillus.LogCopiespermLofMilk3)
71.91107
Manuscript Figures: Bacillus
# Bacillus: Milk Data and Controls
Bacillus.Inn.Ctrl <- Bacillus %>% filter(VariableSampleType!="NP40InoculatedMilk")
dim(Bacillus.Inn.Ctrl)
## [1] 240 42
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2338B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
Bacillus.Inn.Ctrl$VariableSampleType <- factor(Bacillus.Inn.Ctrl$VariableSampleType, levels=c('InoculatedMilk', 'UninoculatedMilk', 'NoTemplateControl', 'MockCommunity'))
ggplot(data=Bacillus.Inn.Ctrl, aes(VariableKit,LogCopiespermLofMilk, color= VariableKit, shape=SpikeSet))+
scale_shape_discrete(solid=F) +
ylab ("Bacillus Log10 Copies / mL of Milk")+
xlab ("Kit")+
geom_point(aes(colour = VariableKit), size = 2, stroke = .5, position=position_jitterdodge(jitter.width=0, dodge.width = 1), show.legend = F) +
facet_wrap(vars(VariableSampleType),nrow = 1)+
ggtitle("Bacillus DNA Copy Numbers - All Samples and Controls")+
theme_bw()+
ylim(0, 9)+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=90,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Warning: Removed 87 rows containing missing values (geom_point).
ggsave("Bacillus-AllSamples.TIFF", width = 9, height = 3,units = "in", dpi = 600)
## Warning: Removed 87 rows containing missing values (geom_point).
ggplot(Bacillus.InnOnly, aes(VariableKit,LogCopiespermLofMilk,shape = factor(SpikeSet))) +
scale_shape_discrete(solid=F) +
geom_point(aes(colour = VariableKit), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = 1)) +
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Bacillus DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
# Plot overlaying model estimates to raw data
mod_df1_Bacillus.rawdata<- Bacillus.InnOnly[c(25,42,4)]
mod_df2_Bacillus.best.model<-emmeans(mod3,~VariableKit, mode = "df.error") %>%
summary() %>%
data.frame()
ggplot() +
geom_jitter(data=mod_df1_Bacillus.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2,stroke = 1, width = .2 ) +
scale_shape_discrete(solid=F) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Bacillus.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Bacillus.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Bacillus.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = -2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Bacillus DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
ggsave("Bacillus-Model-Jitter.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)
ggplot() +
geom_point(data=mod_df1_Bacillus.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = .5)) +
scale_shape_discrete(solid=F) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Bacillus.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Bacillus.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Bacillus.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = -2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Bacillus DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
ggsave("Bacillus-Model.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)
Listeria
#Data File: CleanDNAprepData1.18.19
library(ggplot2)
library(dplyr)
library(emmeans)
library(multcompView)
#Filter Subset from Sample Data
Listeria <- SampleData %>% filter(Assay=="Listeria monocytogenes")
dim(Listeria)
## [1] 240 42
#Summary Statistics
Listeria.summary <- Listeria %>%
group_by(VariableKit,VariableSampleType) %>%
summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
st_dev=sd(LogCopiespermLofMilk,na.rm=T),
n_missing=sum(is.na(LogCopiespermLofMilk)),
n_total=n())%>%
data.frame()
## `summarise()` regrouping output by 'VariableKit' (override with `.groups` argument)
write.table (Listeria.summary, "Listeria.summary.txt", sep="\t" )
#Plot Raw Means and Standard Deviations
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(data=Listeria,
mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk,z=VariableKit, color=VariableKit, ylab="Copy Numbers")) +
ylab ("Log10 Copies / mL of Milk")+
geom_boxplot(lwd=1)+
theme_bw()+
ggtitle("Listeria Copy Numbers")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk", "MockCommunity", "NoTemplateControl"))
## Warning: Removed 130 rows containing non-finite values (stat_boxplot).
ggplot(data=Listeria,
mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
ylab ("Log10 Copies / mL of Milk")+
geom_jitter(width=0.25)+
ggtitle("Listeria Copy Numbers")+
theme_bw()+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk", "MockCommunity", "NoTemplateControl"))
## Warning: Removed 130 rows containing missing values (geom_point).
ggplot(data=Listeria,
mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=SpikeSet, shape=VariableKit)) +
ylab ("Log10 Copies / mL of Milk")+
geom_jitter(width=0.35)+
ggtitle("Listeria Copy Numbers")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk", "MockCommunity", "NoTemplateControl"))
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 150 rows containing missing values (geom_point).
# Inoculated Milk Data
Listeria.InnOnly <- Listeria %>% filter(VariableSampleType=="InoculatedMilk", LogCopiespermLofMilk>0.001)
Listeria.InnOnly %>%
group_by(VariableSampleType, VariableSpikeSet, VariableKit) %>%
summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
st_dev=sd(LogCopiespermLofMilk,na.rm=T),
n_missing=sum(is.na(LogCopiespermLofMilk)),
n_total=n())%>%
data.frame()
## `summarise()` regrouping output by 'VariableSampleType', 'VariableSpikeSet' (override with `.groups` argument)
## VariableSampleType VariableSpikeSet VariableKit mean_LogCopiespermLofMilk
## 1 InoculatedMilk First COREDNA 4.996165
## 2 InoculatedMilk First Mastitis 5.920458
## 3 InoculatedMilk First Pfood 5.261206
## 4 InoculatedMilk First PSoilP 4.589896
## 5 InoculatedMilk First ZymoDNA 5.217074
## 6 InoculatedMilk Second COREDNA 6.764514
## 7 InoculatedMilk Second Mastitis 6.925829
## 8 InoculatedMilk Second Pfood 6.209820
## 9 InoculatedMilk Second PSoilP 5.652350
## 10 InoculatedMilk Second PviralDNA 6.818189
## 11 InoculatedMilk Second ZymoDNA 6.299305
## 12 InoculatedMilk Third COREDNA 6.868625
## 13 InoculatedMilk Third Mastitis 6.906907
## 14 InoculatedMilk Third Pfood 6.311089
## 15 InoculatedMilk Third PSoilP 6.058299
## 16 InoculatedMilk Third PviralDNA 6.871705
## 17 InoculatedMilk Third ZymoDNA 6.483661
## st_dev n_missing n_total
## 1 0.07118535 0 6
## 2 0.03284588 0 6
## 3 0.05882789 0 6
## 4 0.10368861 0 5
## 5 0.10607334 0 6
## 6 0.06440611 0 6
## 7 0.05305144 0 6
## 8 0.05854870 0 6
## 9 0.28801610 0 4
## 10 0.10167286 0 6
## 11 0.06699653 0 6
## 12 0.03112356 0 6
## 13 0.01320150 0 6
## 14 0.02199862 0 6
## 15 0.11216514 0 6
## 16 0.08573938 0 6
## 17 0.19226732 0 6
#3 linear models were compared: including SpikeSet only, qPCRefficiency only, and both as covariates. Best model fit was used as the final model.
m_Listeria.LogCopiespermLofMilk1 <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Listeria.InnOnly )
summary(m_Listeria.LogCopiespermLofMilk1)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Listeria.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4850 -0.1104 0.0259 0.1451 0.2849
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.37287 0.05033 106.749 < 2e-16 ***
## VariableKitMastitis 0.37463 0.05986 6.258 1.25e-08 ***
## VariableKitPfood -0.28240 0.05986 -4.718 8.59e-06 ***
## VariableKitPSoilP -0.75785 0.06285 -12.058 < 2e-16 ***
## VariableKitPviralDNA 0.21673 0.06830 3.173 0.002056 **
## VariableKitZymoDNA -0.20975 0.05986 -3.504 0.000713 ***
## SpikeSetSecond 1.19041 0.04651 25.595 < 2e-16 ***
## SpikeSetThird 1.32028 0.04582 28.815 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1796 on 91 degrees of freedom
## Multiple R-squared: 0.943, Adjusted R-squared: 0.9386
## F-statistic: 215.1 on 7 and 91 DF, p-value: < 2.2e-16
m_Listeria.LogCopiespermLofMilk2 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, data=Listeria.InnOnly )
summary(m_Listeria.LogCopiespermLofMilk2)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency,
## data = Listeria.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5353 -0.1140 0.0311 0.1480 0.3386
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.95563 0.13712 72.605 < 2e-16 ***
## VariableKitMastitis 0.39940 0.06362 6.278 1.11e-08 ***
## VariableKitPfood -0.25763 0.06362 -4.050 0.000107 ***
## VariableKitPSoilP -0.73690 0.06672 -11.045 < 2e-16 ***
## VariableKitPviralDNA 0.24188 0.07241 3.341 0.001209 **
## VariableKitZymoDNA -0.18499 0.06362 -2.908 0.004561 **
## qPCRefficiency -7.31247 0.25287 -28.918 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1908 on 92 degrees of freedom
## Multiple R-squared: 0.9349, Adjusted R-squared: 0.9307
## F-statistic: 220.3 on 6 and 92 DF, p-value: < 2.2e-16
m_Listeria.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Listeria.InnOnly )
summary(m_Listeria.LogCopiespermLofMilk3)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency +
## SpikeSet, data = Listeria.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.38090 -0.06193 -0.00581 0.07481 0.30014
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -38.47062 4.19088 -9.180 1.46e-14 ***
## VariableKitMastitis 0.13712 0.04637 2.957 0.00397 **
## VariableKitPfood -0.51991 0.04637 -11.212 < 2e-16 ***
## VariableKitPSoilP -0.99555 0.04815 -20.676 < 2e-16 ***
## VariableKitPviralDNA 0.02838 0.04952 0.573 0.56805
## VariableKitZymoDNA -0.44726 0.04637 -9.645 1.56e-15 ***
## qPCRefficiency 70.12757 6.70308 10.462 < 2e-16 ***
## SpikeSetSecond 12.17216 1.05015 11.591 < 2e-16 ***
## SpikeSetThird 14.09919 1.22185 11.539 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1213 on 90 degrees of freedom
## Multiple R-squared: 0.9743, Adjusted R-squared: 0.972
## F-statistic: 426.2 on 8 and 90 DF, p-value: < 2.2e-16
# Fit of model with both qPCRefficiency and SpikeSet is better than fit of model with SpikeSet only
anova(m_Listeria.LogCopiespermLofMilk1, m_Listeria.LogCopiespermLofMilk2, m_Listeria.LogCopiespermLofMilk3)
## Analysis of Variance Table
##
## Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency
## Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 91 2.9347
## 2 92 3.3504 -1 -0.41571 28.253 7.673e-07 ***
## 3 90 1.3242 2 2.02619 68.853 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(m_Listeria.LogCopiespermLofMilk1)
## [1] -49.38216
AIC(m_Listeria.LogCopiespermLofMilk2)
## [1] -38.26713
AIC(m_Listeria.LogCopiespermLofMilk3)
## [1] -126.1634
# Final model chosen:
# Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
# m_Listeria.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Listeria.InnOnly )
library(lme4)
library(lmerTest)
model1 = lmer(LogCopiespermLofMilk ~ VariableKit + (1|SpikeSet),
data=Listeria.InnOnly,
REML=TRUE)
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
## Data: Listeria.InnOnly
##
## REML criterion at convergence: -26.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7104 -0.6104 0.1486 0.8050 1.5907
##
## Random effects:
## Groups Name Variance Std.Dev.
## SpikeSet (Intercept) 0.52832 0.7269
## Residual 0.03225 0.1796
## Number of obs: 99, groups: SpikeSet, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.20977 0.42178 2.03237 14.723 0.004289 **
## VariableKitMastitis 0.37463 0.05986 90.99933 6.258 1.25e-08 ***
## VariableKitPfood -0.28240 0.05986 90.99933 -4.718 8.59e-06 ***
## VariableKitPSoilP -0.75784 0.06285 90.99999 -12.058 < 2e-16 ***
## VariableKitPviralDNA 0.21761 0.06829 91.01451 3.186 0.001975 **
## VariableKitZymoDNA -0.20975 0.05986 90.99933 -3.504 0.000713 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) VrblKM VrblKP VrKPSP VKPDNA
## VrblKtMstts -0.071
## VariblKtPfd -0.071 0.500
## VarblKtPSlP -0.068 0.476 0.476
## VrblKtPvDNA -0.062 0.438 0.438 0.417
## VrblKtZyDNA -0.071 0.500 0.500 0.476 0.438
model2 = lmer(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1|SpikeSet),
data=Listeria.InnOnly,
REML=TRUE)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
summary(model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
## Data: Listeria.InnOnly
##
## REML criterion at convergence: -94.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.14660 -0.50911 -0.05461 0.61975 2.46790
##
## Random effects:
## Groups Name Variance Std.Dev.
## SpikeSet (Intercept) 57.12448 7.5581
## Residual 0.01472 0.1213
## Number of obs: 99, groups: SpikeSet, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -29.11488 5.53685 4.90246 -5.258 0.00350 **
## VariableKitMastitis 0.14108 0.04629 90.57640 3.048 0.00302 **
## VariableKitPfood -0.51595 0.04629 90.57640 -11.146 < 2e-16 ***
## VariableKitPSoilP -0.99158 0.04807 90.53671 -20.627 < 2e-16 ***
## VariableKitPviralDNA 0.03155 0.04947 90.32411 0.638 0.52522
## VariableKitZymoDNA -0.44331 0.04629 90.57640 -9.577 2.05e-15 ***
## qPCRefficiency 68.95896 6.65292 91.81209 10.365 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) VrblKM VrblKP VrKPSP VKPDNA VKZDNA
## VrblKtMstts 0.296
## VariblKtPfd 0.296 0.618
## VarblKtPSlP 0.286 0.596 0.596
## VrblKtPvDNA 0.219 0.533 0.533 0.513
## VrblKtZyDNA 0.296 0.618 0.618 0.596 0.533
## qPCReffcncy -0.616 -0.487 -0.487 -0.469 -0.361 -0.487
## optimizer (nloptwrap) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
anova(model1, model2)
## refitting model(s) with ML (instead of REML)
## Data: Listeria.InnOnly
## Models:
## model1: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
## model2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model1 8 -30.520 -9.759 23.260 -46.52
## model2 9 -90.866 -67.510 54.433 -108.87 62.346 1 2.881e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC (model1)
## [1] -10.19267
AIC (model2)
## [1] -76.85232
m_Listeria.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Listeria.InnOnly )
summary(m_Listeria.LogCopiespermLofMilk)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency +
## SpikeSet, data = Listeria.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.38090 -0.06193 -0.00581 0.07481 0.30014
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -38.47062 4.19088 -9.180 1.46e-14 ***
## VariableKitMastitis 0.13712 0.04637 2.957 0.00397 **
## VariableKitPfood -0.51991 0.04637 -11.212 < 2e-16 ***
## VariableKitPSoilP -0.99555 0.04815 -20.676 < 2e-16 ***
## VariableKitPviralDNA 0.02838 0.04952 0.573 0.56805
## VariableKitZymoDNA -0.44726 0.04637 -9.645 1.56e-15 ***
## qPCRefficiency 70.12757 6.70308 10.462 < 2e-16 ***
## SpikeSetSecond 12.17216 1.05015 11.591 < 2e-16 ***
## SpikeSetThird 14.09919 1.22185 11.539 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1213 on 90 degrees of freedom
## Multiple R-squared: 0.9743, Adjusted R-squared: 0.972
## F-statistic: 426.2 on 8 and 90 DF, p-value: < 2.2e-16
plot(x=predict(m_Listeria.LogCopiespermLofMilk),y=resid(m_Listeria.LogCopiespermLofMilk))
# using ggplot2
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(m_Listeria.LogCopiespermLofMilk, aes(x=predict(m_Listeria.LogCopiespermLofMilk), y=resid(m_Listeria.LogCopiespermLofMilk), color=VariableKit)) +
geom_point()+
theme_bw()+
ggtitle("Listeria Innoculated Only - Model Fit - Residuals vs Predicted")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
geom_hline(yintercept = 1) +
geom_hline(yintercept = -1)
# qqplots
qqnorm(resid(m_Listeria.LogCopiespermLofMilk))
qqline(resid(m_Listeria.LogCopiespermLofMilk))
summary(m_Listeria.LogCopiespermLofMilk)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency +
## SpikeSet, data = Listeria.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.38090 -0.06193 -0.00581 0.07481 0.30014
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -38.47062 4.19088 -9.180 1.46e-14 ***
## VariableKitMastitis 0.13712 0.04637 2.957 0.00397 **
## VariableKitPfood -0.51991 0.04637 -11.212 < 2e-16 ***
## VariableKitPSoilP -0.99555 0.04815 -20.676 < 2e-16 ***
## VariableKitPviralDNA 0.02838 0.04952 0.573 0.56805
## VariableKitZymoDNA -0.44726 0.04637 -9.645 1.56e-15 ***
## qPCRefficiency 70.12757 6.70308 10.462 < 2e-16 ***
## SpikeSetSecond 12.17216 1.05015 11.591 < 2e-16 ***
## SpikeSetThird 14.09919 1.22185 11.539 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1213 on 90 degrees of freedom
## Multiple R-squared: 0.9743, Adjusted R-squared: 0.972
## F-statistic: 426.2 on 8 and 90 DF, p-value: < 2.2e-16
#No large residuals were identified
Listeria.InnOnly$resid <- resid(m_Listeria.LogCopiespermLofMilk)
Listeria.InnOnly %>%
filter(abs(resid)>1) %>%
select(VariableKit,resid) %>%
group_by(VariableKit) %>%
summarize(n=n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 0 x 2
## # … with 2 variables: VariableKit <chr>, n <int>
# Check Tukey-adjusted pairwise comparison of kit estimates
m_Listeria.LogCopiespermLofMilk_emmeans <- emmeans(m_Listeria.LogCopiespermLofMilk,pairwise~VariableKit)
# Use compact letter display for convenience
m_Listeria.LogCopiespermLofMilk_cld <- CLD(m_Listeria.LogCopiespermLofMilk_emmeans$emmeans,
Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
m_Listeria.LogCopiespermLofMilk_cld
## VariableKit emmean SE df lower.CL upper.CL .group
## PSoilP 4.90 0.0610 90 4.78 5.03 A
## Pfood 5.38 0.0596 90 5.26 5.50 B
## ZymoDNA 5.45 0.0596 90 5.33 5.57 B
## COREDNA 5.90 0.0412 90 5.82 5.98 C
## PviralDNA 5.93 0.0598 90 5.81 6.05 CD
## Mastitis 6.04 0.0596 90 5.92 6.16 D
##
## Results are averaged over the levels of: SpikeSet
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 6 estimates
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(m_Listeria.LogCopiespermLofMilk,~ VariableKit) %>%
summary() %>%
data.frame()
## VariableKit emmean SE df lower.CL upper.CL
## 1 COREDNA 5.899998 0.04115991 90 5.818226 5.981769
## 2 Mastitis 6.037118 0.05961461 90 5.918684 6.155553
## 3 Pfood 5.380092 0.05961461 90 5.261658 5.498527
## 4 PSoilP 4.904452 0.06101657 90 4.783232 5.025673
## 5 PviralDNA 5.928375 0.05981400 90 5.809544 6.047206
## 6 ZymoDNA 5.452734 0.05961461 90 5.334299 5.571169
# Plot fitted values from model
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
emmeans(m_Listeria.LogCopiespermLofMilk,~VariableKit) %>%
summary() %>%
data.frame() %>%
ggplot(aes(x=VariableKit,y=emmean,color=VariableKit)) +
geom_point() +
labs(y="Estimated Marginal Means") +
geom_errorbar(aes(ymin=lower.CL,ymax=upper.CL),width=0.5) +
geom_text(data=data.frame(m_Listeria.LogCopiespermLofMilk_cld),aes(x=VariableKit,label=`.group`),hjust=-.1) +
theme_bw()+
ggtitle("Listeria Copy Numbers - Inoculated Milk Only")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Listeria - Models not assuming homoscedasticity
# from https://cran.r-project.org/web/packages/emmeans/vignettes/FAQs.html#contents
library(nlme)
# Final model chosen:
# Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
# m_Listeria.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Listeria.InnOnly )
mod.Listeria = nlme::gls(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Listeria.InnOnly,
weights = varIdent(form = ~1 | VariableKit))
summary(mod.Listeria)
## Generalized least squares fit by REML
## Model: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
## Data: Listeria.InnOnly
## AIC BIC logLik
## -136.564 -99.06683 83.28199
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | VariableKit
## Parameter estimates:
## COREDNA Mastitis Pfood PSoilP PviralDNA ZymoDNA
## 1.0000000 0.8738631 0.8509705 4.3116394 1.5284977 2.6389409
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) -44.37198 2.109774 -21.031628 0.000
## VariableKitMastitis 0.10487 0.021750 4.821762 0.000
## VariableKitPfood -0.55215 0.021573 -25.594078 0.000
## VariableKitPSoilP -1.02340 0.068235 -14.998130 0.000
## VariableKitPviralDNA 0.02838 0.031166 0.910509 0.365
## VariableKitZymoDNA -0.47951 0.040946 -11.710872 0.000
## qPCRefficiency 79.64858 3.376327 23.590305 0.000
## SpikeSetSecond 13.61953 0.527318 25.827934 0.000
## SpikeSetThird 15.72431 0.613679 25.623005 0.000
##
## Correlation:
## (Intr) VrblKM VrblKP VrKPSP VKPDNA VKZDNA qPCRff SpkStS
## VariableKitMastitis 0.521
## VariableKitPfood 0.526 0.692
## VariableKitPSoilP 0.166 0.219 0.221
## VariableKitPviralDNA 0.270 0.429 0.433 0.137
## VariableKitZymoDNA 0.277 0.365 0.368 0.116 0.228
## qPCRefficiency -1.000 -0.526 -0.530 -0.168 -0.272 -0.279
## SpikeSetSecond -1.000 -0.525 -0.530 -0.167 -0.277 -0.279 0.999
## SpikeSetThird -1.000 -0.526 -0.530 -0.168 -0.276 -0.279 1.000 1.000
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.88195463 -0.73035842 -0.04161918 0.79892625 1.90855601
##
## Residual standard error: 0.05910791
## Degrees of freedom: 99 total; 90 residual
AIC(m_Listeria.LogCopiespermLofMilk3)
## [1] -126.1634
AIC(mod.Listeria)
## [1] -136.564
# Testing simpler model
mod3.Listeria = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=Listeria.InnOnly,
weights = varIdent(form = ~1 | VariableKit))
summary(mod3.Listeria)
## Generalized least squares fit by REML
## Model: LogCopiespermLofMilk ~ VariableKit
## Data: Listeria.InnOnly
## AIC BIC logLik
## 170.6132 201.0044 -73.30658
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | VariableKit
## Parameter estimates:
## COREDNA Mastitis Pfood PSoilP PviralDNA ZymoDNA
## 1.0000000 0.5468388 0.5518105 0.7654328 0.1060306 0.6640781
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 6.209768 0.2087850 29.742411 0.0000
## VariableKitMastitis 0.374630 0.2379629 1.574320 0.1188
## VariableKitPfood -0.282396 0.2384627 -1.184237 0.2393
## VariableKitPSoilP -0.749190 0.2724676 -2.749648 0.0072
## VariableKitPviralDNA 0.635179 0.2105381 3.016932 0.0033
## VariableKitZymoDNA -0.209755 0.2506289 -0.836913 0.4048
##
## Correlation:
## (Intr) VrblKM VrblKP VrKPSP VKPDNA
## VariableKitMastitis -0.877
## VariableKitPfood -0.876 0.768
## VariableKitPSoilP -0.766 0.672 0.671
## VariableKitPviralDNA -0.992 0.870 0.868 0.760
## VariableKitZymoDNA -0.833 0.731 0.729 0.638 0.826
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.9601839 -1.2519516 0.5402811 0.7307451 1.5677673
##
## Residual standard error: 0.8857996
## Degrees of freedom: 99 total; 93 residual
AIC(m_Listeria.LogCopiespermLofMilk3)
## [1] -126.1634
AIC(mod.Listeria)
## [1] -136.564
AIC(mod3.Listeria) #mod.Listeria is best model (including qPCRefficiency and SpikeSet)
## [1] 170.6132
mod.Listeria.best <- mod.Listeria
#mod.Listeria not assuming homoscedasticity and including qPCRefficiency and SpikeSet is a much better fit than any of the alternatives
library(emmeans)
# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Listeria.best_emmeans <- emmeans(mod.Listeria.best,pairwise~VariableKit, mode = "df.error")
# Use compact letter display for convenience
mod.Listeria.best_cld <- CLD(mod.Listeria.best_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Listeria.best_cld_letters <- CLD(mod.Listeria.best_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Listeria.best_cld_letters
## VariableKit emmean SE df lower.CL upper.CL .group
## PSoilP 4.83 0.0709 90 4.69 4.98 A
## Pfood 5.31 0.0289 90 5.25 5.36 B
## ZymoDNA 5.38 0.0452 90 5.29 5.47 B
## COREDNA 5.86 0.0204 90 5.82 5.90 C
## PviralDNA 5.89 0.0354 90 5.82 5.96 CD
## Mastitis 5.96 0.0290 90 5.91 6.02 D
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: df.error
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 6 estimates
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(mod.Listeria.best,~ VariableKit,mode = "df.error") %>%
summary() %>%
data.frame()
## VariableKit emmean SE df lower.CL upper.CL
## 1 COREDNA 5.857941 0.02040897 90 5.817395 5.898487
## 2 Mastitis 5.962816 0.02902571 90 5.905151 6.020480
## 3 Pfood 5.305790 0.02889339 90 5.248388 5.363192
## 4 PSoilP 4.834540 0.07089067 90 4.693704 4.975377
## 5 PviralDNA 5.886318 0.03539537 90 5.815999 5.956637
## 6 ZymoDNA 5.378431 0.04523235 90 5.288569 5.468293
# Get summary
summary(emmeans(mod.Listeria.best,~ VariableKit,mode = "df.error"), infer=TRUE)
## VariableKit emmean SE df lower.CL upper.CL t.ratio p.value
## COREDNA 5.86 0.0204 90 5.82 5.90 287.028 <.0001
## Mastitis 5.96 0.0290 90 5.91 6.02 205.432 <.0001
## Pfood 5.31 0.0289 90 5.25 5.36 183.633 <.0001
## PSoilP 4.83 0.0709 90 4.69 4.98 68.197 <.0001
## PviralDNA 5.89 0.0354 90 5.82 5.96 166.302 <.0001
## ZymoDNA 5.38 0.0452 90 5.29 5.47 118.907 <.0001
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: df.error
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod_df1_Listeria.rawdata<-Listeria.InnOnly[c(25,42,4)]
mod_df2_Listeria.best.model<-emmeans(mod.Listeria.best,~VariableKit, mode = "df.error") %>%
summary() %>%
data.frame()
ggplot() +
geom_jitter(data=mod_df1_Listeria.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
geom_point(data=mod_df2_Listeria.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_errorbar(data=mod_df2_Listeria.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)
# Making the plot pretty
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot() +
geom_jitter(data=mod_df1_Listeria.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet)) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Listeria.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Listeria.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Listeria.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
theme_bw()+
ggtitle("Listeria Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
Model not assuming homoscedasticity and including VariableKit + SpikeSet + qPCRefficiency was chosen.
qPCRefficiency is forced into all final models
Formula: mod.Listeria = nlme::gls(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Listeria.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod.Listeria)
-136.564 # best model
Previously chosen Linear Model that assumed homoscedasticity for reference:
m_Listeria.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Listeria.InnOnly )
AIC(m_Listeria.LogCopiespermLofMilk3)
-126.1634
Manuscript Figures: Listeria
# Listeria: Milk Data and Controls
Listeria.Inn.Ctrl <- Listeria %>% filter(VariableSampleType!="NP40InoculatedMilk")
dim(Listeria.Inn.Ctrl)
## [1] 240 42
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2338B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
Listeria.Inn.Ctrl$VariableSampleType <- factor(Listeria.Inn.Ctrl$VariableSampleType, levels=c('InoculatedMilk', 'UninoculatedMilk', 'NoTemplateControl', 'MockCommunity'))
ggplot(data=Listeria.Inn.Ctrl, aes(VariableKit,LogCopiespermLofMilk, color= VariableKit, shape=SpikeSet))+
scale_shape_discrete(solid=F) +
ylab ("Listeria Log10 Copies / mL of Milk")+
xlab ("Kit")+
geom_point(aes(colour = VariableKit), size = 2, stroke = .5, position=position_jitterdodge(jitter.width=0, dodge.width = 1),show.legend = F) +
facet_wrap(vars(VariableSampleType),nrow = 1)+
ggtitle("Listeria DNA Copy Numbers - All Samples and Controls")+
theme_bw()+
ylim(0, 9)+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=90,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Warning: Removed 130 rows containing missing values (geom_point).
ggsave("Listeria-AllSamples.TIFF", width = 9, height = 3,units = "in", dpi = 600)
## Warning: Removed 130 rows containing missing values (geom_point).
ggplot(Listeria.InnOnly, aes(VariableKit,LogCopiespermLofMilk,shape = factor(SpikeSet))) +
scale_shape_discrete(solid=F) +
geom_point(aes(colour = VariableKit), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = 1)) +
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Listeria DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
# Plot overlaying model estimates to raw data
mod_df1_Listeria.rawdata<- Listeria.InnOnly[c(25,42,4)]
mod_df2_Listeria.best.model<-emmeans(mod.Listeria.best,~VariableKit, mode = "df.error") %>%
summary() %>%
data.frame()
ggplot() +
geom_jitter(data=mod_df1_Listeria.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2,stroke = 1, width = .2 ) +
scale_shape_discrete(solid=F) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Listeria.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Listeria.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Listeria.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = .3, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Listeria DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
ggsave("Listeria-Model-Jitter.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)
ggplot() +
geom_point(data=mod_df1_Listeria.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = .5)) +
scale_shape_discrete(solid=F) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Listeria.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Listeria.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Listeria.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = .3, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Listeria DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
ggsave("Listeria-Model.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)
Mycobacterium
#Data File: CleanDNAprepData1.18.19
library(ggplot2)
library(dplyr)
library(emmeans)
library(multcompView)
#Filter Subset from Sample Data
Mycobacterium <- SampleData %>% filter(Assay=="Mycobacterium smegmatis")
dim(Mycobacterium)
## [1] 240 42
#Summary Statistics
Mycobacterium.summary <- Mycobacterium %>%
group_by(VariableKit,VariableSampleType) %>%
summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
st_dev=sd(LogCopiespermLofMilk,na.rm=T),
n_missing=sum(is.na(LogCopiespermLofMilk)),
n_total=n())%>%
data.frame()
## `summarise()` regrouping output by 'VariableKit' (override with `.groups` argument)
write.table (Mycobacterium.summary, "Mycobacterium.summary.txt", sep="\t" )
#Plot Raw Means and Standard Deviations
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(data=Mycobacterium,
mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk,z=VariableKit, color=VariableKit, ylab="Copy Numbers")) +
ylab ("Log10 Copies / mL of Milk")+
geom_boxplot(lwd=1)+
theme_bw()+
ggtitle("Mycobacterium Copy Numbers")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk", "MockCommunity", "NoTemplateControl"))
## Warning: Removed 101 rows containing non-finite values (stat_boxplot).
ggplot(data=Mycobacterium,
mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
ylab ("Log10 Copies / mL of Milk")+
geom_jitter(width=0.25)+
ggtitle("Mycobacterium Copy Numbers")+
theme_bw()+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk", "MockCommunity", "NoTemplateControl"))
## Warning: Removed 101 rows containing missing values (geom_point).
ggplot(data=Mycobacterium,
mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=SpikeSet, shape=VariableKit)) +
ylab ("Log10 Copies / mL of Milk")+
geom_jitter(width=0.35)+
ggtitle("Mycobacterium Copy Numbers")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 123 rows containing missing values (geom_point).
# Inoculated Milk Data
Mycobacterium.InnOnly <- Mycobacterium %>% filter(VariableSampleType=="InoculatedMilk", LogCopiespermLofMilk>0.001)
Mycobacterium.InnOnly %>%
group_by(VariableSampleType, VariableSpikeSet, VariableKit) %>%
summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
st_dev=sd(LogCopiespermLofMilk,na.rm=T),
n_missing=sum(is.na(LogCopiespermLofMilk)),
n_total=n())%>%
data.frame()
## `summarise()` regrouping output by 'VariableSampleType', 'VariableSpikeSet' (override with `.groups` argument)
## VariableSampleType VariableSpikeSet VariableKit mean_LogCopiespermLofMilk
## 1 InoculatedMilk First COREDNA 6.410321
## 2 InoculatedMilk First EZFood 3.387637
## 3 InoculatedMilk First Mastitis 6.132651
## 4 InoculatedMilk First Pfood 5.835527
## 5 InoculatedMilk First PSoilP 4.337075
## 6 InoculatedMilk First PviralDNA 5.791533
## 7 InoculatedMilk First ZymoDNA 5.517488
## 8 InoculatedMilk Second COREDNA 7.101812
## 9 InoculatedMilk Second Mastitis 7.079584
## 10 InoculatedMilk Second Pfood 6.579458
## 11 InoculatedMilk Second PSoilP 6.072664
## 12 InoculatedMilk Second PviralDNA 6.680003
## 13 InoculatedMilk Second ZymoDNA 6.701950
## 14 InoculatedMilk Third COREDNA 6.917894
## 15 InoculatedMilk Third EZFood 6.308042
## 16 InoculatedMilk Third Mastitis 6.875300
## 17 InoculatedMilk Third Pfood 6.465286
## 18 InoculatedMilk Third PSoilP 5.904056
## 19 InoculatedMilk Third PviralDNA 6.266574
## 20 InoculatedMilk Third ZymoDNA 6.695051
## st_dev n_missing n_total
## 1 0.03379999 0 6
## 2 0.53911986 0 4
## 3 0.03721685 0 6
## 4 0.02840872 0 6
## 5 0.18166318 0 6
## 6 0.06735908 0 6
## 7 0.08926898 0 6
## 8 0.07093623 0 6
## 9 0.02215197 0 6
## 10 0.04582864 0 6
## 11 0.17450050 0 6
## 12 0.03305596 0 6
## 13 0.04422551 0 6
## 14 0.08192923 0 6
## 15 NA 0 1
## 16 0.03405437 0 6
## 17 0.02810963 0 6
## 18 0.13596609 0 6
## 19 0.08559028 0 6
## 20 0.19000457 0 6
#3 linear models were compared: including SpikeSet only, qPCRefficiency only, and both as covariates. Best model fit was used as the final model.
m_Mycobacterium.LogCopiespermLofMilk1 <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Mycobacterium.InnOnly )
summary(m_Mycobacterium.LogCopiespermLofMilk1)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Mycobacterium.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.92497 -0.13287 -0.02344 0.15372 1.58590
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.13871 0.07752 79.185 < 2e-16 ***
## VariableKitEZFood -2.35460 0.14721 -15.995 < 2e-16 ***
## VariableKitMastitis -0.11416 0.09533 -1.198 0.234
## VariableKitPfood -0.51659 0.09533 -5.419 3.90e-07 ***
## VariableKitPSoilP -1.37208 0.09533 -14.393 < 2e-16 ***
## VariableKitPviralDNA -0.56397 0.09533 -5.916 4.26e-08 ***
## VariableKitZymoDNA -0.50518 0.09533 -5.299 6.55e-07 ***
## SpikeSetSecond 1.07587 0.06705 16.046 < 2e-16 ***
## SpikeSetThird 0.93803 0.06596 14.222 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.286 on 104 degrees of freedom
## Multiple R-squared: 0.8946, Adjusted R-squared: 0.8865
## F-statistic: 110.3 on 8 and 104 DF, p-value: < 2.2e-16
m_Mycobacterium.LogCopiespermLofMilk2 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, data=Mycobacterium.InnOnly )
summary(m_Mycobacterium.LogCopiespermLofMilk2)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency,
## data = Mycobacterium.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.86642 -0.22173 0.00509 0.19529 1.35168
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.5221 0.1298 65.677 < 2e-16 ***
## VariableKitEZFood -1.9474 0.1653 -11.782 < 2e-16 ***
## VariableKitMastitis 0.2337 0.1048 2.229 0.0279 *
## VariableKitPfood -0.1687 0.1048 -1.609 0.1106
## VariableKitPSoilP -1.0242 0.1048 -9.769 < 2e-16 ***
## VariableKitPviralDNA -0.5640 0.1025 -5.500 2.69e-07 ***
## VariableKitZymoDNA -0.1573 0.1048 -1.500 0.1365
## qPCRefficiency -3.1148 0.1958 -15.910 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3076 on 105 degrees of freedom
## Multiple R-squared: 0.8769, Adjusted R-squared: 0.8686
## F-statistic: 106.8 on 7 and 105 DF, p-value: < 2.2e-16
m_Mycobacterium.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Mycobacterium.InnOnly )
summary(m_Mycobacterium.LogCopiespermLofMilk3)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency +
## SpikeSet, data = Mycobacterium.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.89045 -0.11670 -0.02229 0.10545 1.44781
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.31862 0.25291 28.938 < 2e-16 ***
## VariableKitEZFood -2.09249 0.14393 -14.538 < 2e-16 ***
## VariableKitMastitis 0.06611 0.09404 0.703 0.483636
## VariableKitPfood -0.33631 0.09404 -3.576 0.000533 ***
## VariableKitPSoilP -1.19180 0.09404 -12.673 < 2e-16 ***
## VariableKitPviralDNA -0.56397 0.08641 -6.527 2.57e-09 ***
## VariableKitZymoDNA -0.32490 0.09404 -3.455 0.000801 ***
## qPCRefficiency -1.61421 0.33237 -4.857 4.28e-06 ***
## SpikeSetSecond 0.66315 0.10447 6.348 5.97e-09 ***
## SpikeSetThird 0.47280 0.11292 4.187 5.97e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2592 on 103 degrees of freedom
## Multiple R-squared: 0.9142, Adjusted R-squared: 0.9067
## F-statistic: 122 on 9 and 103 DF, p-value: < 2.2e-16
# Fit of model with both qPCRefficiency and SpikeSet is better than fit of model with SpikeSet only
anova(m_Mycobacterium.LogCopiespermLofMilk1, m_Mycobacterium.LogCopiespermLofMilk2, m_Mycobacterium.LogCopiespermLofMilk3)
## Analysis of Variance Table
##
## Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency
## Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 104 8.5058
## 2 105 9.9352 -1 -1.4294 21.273 1.149e-05 ***
## 3 103 6.9209 2 3.0143 22.430 8.199e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(m_Mycobacterium.LogCopiespermLofMilk1)
## [1] 48.38973
AIC(m_Mycobacterium.LogCopiespermLofMilk2)
## [1] 63.94316
AIC(m_Mycobacterium.LogCopiespermLofMilk3)
## [1] 27.08939
# Final model chosen:
# Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
#m_Mycobacterium.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Mycobacterium.InnOnly )
library(lme4)
library(lmerTest)
model1 = lmer(LogCopiespermLofMilk ~ VariableKit + (1|SpikeSet),
data=Mycobacterium.InnOnly,
REML=TRUE)
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
## Data: Mycobacterium.InnOnly
##
## REML criterion at convergence: 64.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2385 -0.4552 -0.0726 0.5291 5.5621
##
## Random effects:
## Groups Name Variance Std.Dev.
## SpikeSet (Intercept) 0.34054 0.5836
## Residual 0.08179 0.2860
## Number of obs: 113, groups: SpikeSet, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.81001 0.34359 2.13547 19.820 0.00185 **
## VariableKitEZFood -2.35772 0.14719 104.04783 -16.018 < 2e-16 ***
## VariableKitMastitis -0.11416 0.09533 104.00016 -1.198 0.23380
## VariableKitPfood -0.51659 0.09533 104.00016 -5.419 3.90e-07 ***
## VariableKitPSoilP -1.37208 0.09533 104.00016 -14.393 < 2e-16 ***
## VariableKitPviralDNA -0.56397 0.09533 104.00016 -5.916 4.26e-08 ***
## VariableKitZymoDNA -0.50518 0.09533 104.00016 -5.299 6.55e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VarblKtEZFd -0.090
## VrblKtMstts -0.139 0.324
## VariblKtPfd -0.139 0.324 0.500
## VarblKtPSlP -0.139 0.324 0.500 0.500
## VrblKtPvDNA -0.139 0.324 0.500 0.500 0.500
## VrblKtZyDNA -0.139 0.324 0.500 0.500 0.500 0.500
model2 = lmer(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1|SpikeSet),
data=Mycobacterium.InnOnly,
REML=TRUE)
summary(model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
## Data: Mycobacterium.InnOnly
##
## REML criterion at convergence: 41.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4323 -0.4457 -0.1126 0.4201 5.5752
##
## Random effects:
## Groups Name Variance Std.Dev.
## SpikeSet (Intercept) 0.10805 0.3287
## Residual 0.06722 0.2593
## Number of obs: 113, groups: SpikeSet, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 7.74985 0.26748 6.39677 28.973 4.99e-08 ***
## VariableKitEZFood -2.08149 0.14368 103.65629 -14.487 < 2e-16 ***
## VariableKitMastitis 0.07680 0.09371 104.20386 0.820 0.414355
## VariableKitPfood -0.32563 0.09371 104.20386 -3.475 0.000746 ***
## VariableKitPSoilP -1.18112 0.09371 104.20386 -12.604 < 2e-16 ***
## VariableKitPviralDNA -0.56397 0.08642 102.92851 -6.526 2.59e-09 ***
## VariableKitZymoDNA -0.31422 0.09371 104.20386 -3.353 0.001114 **
## qPCRefficiency -1.70987 0.32440 100.04919 -5.271 7.84e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA
## VarblKtEZFd 0.150
## VrblKtMstts 0.109 0.421
## VariblKtPfd 0.109 0.421 0.575
## VarblKtPSlP 0.109 0.421 0.575 0.575
## VrblKtPvDNA -0.162 0.301 0.461 0.461 0.461
## VrblKtZyDNA 0.109 0.421 0.575 0.575 0.575 0.461
## qPCReffcncy -0.667 -0.371 -0.387 -0.387 -0.387 0.000 -0.387
anova(model1, model2)
## refitting model(s) with ML (instead of REML)
## Data: Mycobacterium.InnOnly
## Models:
## model1: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
## model2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model1 9 63.564 88.11 -22.782 45.564
## model2 10 39.466 66.74 -9.733 19.466 26.098 1 3.246e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC (model1)
## [1] 82.45669
AIC (model2)
## [1] 61.19095
m_Mycobacterium.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Mycobacterium.InnOnly )
summary(m_Mycobacterium.LogCopiespermLofMilk)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency +
## SpikeSet, data = Mycobacterium.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.89045 -0.11670 -0.02229 0.10545 1.44781
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.31862 0.25291 28.938 < 2e-16 ***
## VariableKitEZFood -2.09249 0.14393 -14.538 < 2e-16 ***
## VariableKitMastitis 0.06611 0.09404 0.703 0.483636
## VariableKitPfood -0.33631 0.09404 -3.576 0.000533 ***
## VariableKitPSoilP -1.19180 0.09404 -12.673 < 2e-16 ***
## VariableKitPviralDNA -0.56397 0.08641 -6.527 2.57e-09 ***
## VariableKitZymoDNA -0.32490 0.09404 -3.455 0.000801 ***
## qPCRefficiency -1.61421 0.33237 -4.857 4.28e-06 ***
## SpikeSetSecond 0.66315 0.10447 6.348 5.97e-09 ***
## SpikeSetThird 0.47280 0.11292 4.187 5.97e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2592 on 103 degrees of freedom
## Multiple R-squared: 0.9142, Adjusted R-squared: 0.9067
## F-statistic: 122 on 9 and 103 DF, p-value: < 2.2e-16
plot(x=predict(m_Mycobacterium.LogCopiespermLofMilk),y=resid(m_Mycobacterium.LogCopiespermLofMilk))
# using ggplot2
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(m_Mycobacterium.LogCopiespermLofMilk, aes(x=predict(m_Mycobacterium.LogCopiespermLofMilk), y=resid(m_Mycobacterium.LogCopiespermLofMilk), color=VariableKit)) +
geom_point()+
theme_bw()+
ggtitle("Mycobacterium Innoculated Only - Model Fit - Residuals vs Predicted")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
geom_hline(yintercept = 1) +
geom_hline(yintercept = -1)
# qqplots
qqnorm(resid(m_Mycobacterium.LogCopiespermLofMilk))
qqline(resid(m_Mycobacterium.LogCopiespermLofMilk))
summary(m_Mycobacterium.LogCopiespermLofMilk)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency +
## SpikeSet, data = Mycobacterium.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.89045 -0.11670 -0.02229 0.10545 1.44781
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.31862 0.25291 28.938 < 2e-16 ***
## VariableKitEZFood -2.09249 0.14393 -14.538 < 2e-16 ***
## VariableKitMastitis 0.06611 0.09404 0.703 0.483636
## VariableKitPfood -0.33631 0.09404 -3.576 0.000533 ***
## VariableKitPSoilP -1.19180 0.09404 -12.673 < 2e-16 ***
## VariableKitPviralDNA -0.56397 0.08641 -6.527 2.57e-09 ***
## VariableKitZymoDNA -0.32490 0.09404 -3.455 0.000801 ***
## qPCRefficiency -1.61421 0.33237 -4.857 4.28e-06 ***
## SpikeSetSecond 0.66315 0.10447 6.348 5.97e-09 ***
## SpikeSetThird 0.47280 0.11292 4.187 5.97e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2592 on 103 degrees of freedom
## Multiple R-squared: 0.9142, Adjusted R-squared: 0.9067
## F-statistic: 122 on 9 and 103 DF, p-value: < 2.2e-16
# Only 1 large residual was identified, and it belonged to EZfood
Mycobacterium.InnOnly$resid <- resid(m_Mycobacterium.LogCopiespermLofMilk)
Mycobacterium.InnOnly %>%
filter(abs(resid)>1) %>%
select(VariableKit,resid) %>%
group_by(VariableKit) %>%
summarize(n=n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 1 x 2
## VariableKit n
## <chr> <int>
## 1 EZFood 1
# Check Tukey-adjusted pairwise comparison of kit estimates
m_Mycobacterium.LogCopiespermLofMilk_emmeans <- emmeans(m_Mycobacterium.LogCopiespermLofMilk,pairwise~VariableKit)
# Use compact letter display for convenience
m_Mycobacterium.LogCopiespermLofMilk_cld <- CLD(m_Mycobacterium.LogCopiespermLofMilk_emmeans$emmeans,
Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
m_Mycobacterium.LogCopiespermLofMilk_cld
## VariableKit emmean SE df lower.CL upper.CL .group
## EZFood 4.58 0.1215 103 4.34 4.82 A
## PSoilP 5.48 0.0618 103 5.36 5.61 B
## PviralDNA 6.11 0.0671 103 5.98 6.24 C
## Pfood 6.34 0.0618 103 6.22 6.46 C
## ZymoDNA 6.35 0.0618 103 6.23 6.47 C
## COREDNA 6.67 0.0671 103 6.54 6.81 D
## Mastitis 6.74 0.0618 103 6.62 6.86 D
##
## Results are averaged over the levels of: SpikeSet
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 7 estimates
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(m_Mycobacterium.LogCopiespermLofMilk,~ VariableKit) %>%
summary() %>%
data.frame()
## VariableKit emmean SE df lower.CL upper.CL
## 1 COREDNA 6.674713 0.06714940 103 6.541538 6.807888
## 2 EZFood 4.582219 0.12146273 103 4.341326 4.823111
## 3 Mastitis 6.740826 0.06179603 103 6.618268 6.863383
## 4 Pfood 6.338404 0.06179603 103 6.215846 6.460962
## 5 PSoilP 5.482912 0.06179603 103 5.360354 5.605470
## 6 PviralDNA 6.110741 0.06714940 103 5.977566 6.243916
## 7 ZymoDNA 6.349810 0.06179603 103 6.227252 6.472368
# Plot fitted values from model
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
emmeans(m_Mycobacterium.LogCopiespermLofMilk,~VariableKit) %>%
summary() %>%
data.frame() %>%
ggplot(aes(x=VariableKit,y=emmean,color=VariableKit)) +
geom_point() +
labs(y="Estimated Marginal Means") +
geom_errorbar(aes(ymin=lower.CL,ymax=upper.CL),width=0.5) +
geom_text(data=data.frame(m_Mycobacterium.LogCopiespermLofMilk_cld),aes(x=VariableKit,label=`.group`),hjust=-.1) +
theme_bw()+
ggtitle("Mycobacterium Copy Numbers - Inoculated Milk Only")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
# from https://cran.r-project.org/web/packages/emmeans/vignettes/FAQs.html#contents
library(nlme)
# Final model chosen:
# Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
# m_Mycobacterium.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Mycobacterium.InnOnly )
mod.Mycobacterium = nlme::gls(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Mycobacterium.InnOnly,
weights = varIdent(form = ~1 | VariableKit))
summary(mod.Mycobacterium)
## Generalized least squares fit by REML
## Model: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
## Data: Mycobacterium.InnOnly
## AIC BIC logLik
## -80.071 -35.28061 57.0355
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | VariableKit
## Parameter estimates:
## COREDNA EZFood Mastitis Pfood PSoilP PviralDNA ZymoDNA
## 1.0000000 16.7764658 0.4895085 1.3746125 6.5788124 1.7605999 3.3967428
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 6.774266 0.0734689 92.20592 0.0000
## VariableKitEZFood -2.354573 0.4844962 -4.85984 0.0000
## VariableKitMastitis -0.041469 0.0199514 -2.07852 0.0401
## VariableKitPfood -0.443891 0.0279206 -15.89836 0.0000
## VariableKitPSoilP -1.299383 0.1017353 -12.77220 0.0000
## VariableKitPviralDNA -0.563972 0.0307886 -18.31757 0.0000
## VariableKitZymoDNA -0.432485 0.0548675 -7.88234 0.0000
## qPCRefficiency -0.650915 0.0945204 -6.88650 0.0000
## SpikeSetSecond 0.700651 0.0314932 22.24770 0.0000
## SpikeSetThird 0.479920 0.0340255 14.10469 0.0000
##
## Correlation:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA qPCRff
## VariableKitEZFood 0.020
## VariableKitMastitis 0.356 0.039
## VariableKitPfood 0.254 0.028 0.615
## VariableKitPSoilP 0.070 0.008 0.169 0.121
## VariableKitPviralDNA -0.102 0.016 0.376 0.269 0.074
## VariableKitZymoDNA 0.129 0.014 0.313 0.224 0.061 0.137
## qPCRefficiency -0.971 -0.028 -0.529 -0.378 -0.104 0.000 -0.192
## SpikeSetSecond -0.902 -0.019 -0.465 -0.332 -0.091 0.000 -0.169 0.879
## SpikeSetThird -0.916 -0.021 -0.475 -0.339 -0.093 0.000 -0.173 0.897
## SpkStS
## VariableKitEZFood
## VariableKitMastitis
## VariableKitPfood
## VariableKitPSoilP
## VariableKitPviralDNA
## VariableKitZymoDNA
## qPCRefficiency
## SpikeSetSecond
## SpikeSetThird 0.894
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.06631319 -0.60733948 -0.01670647 0.70681273 2.59918745
##
## Residual standard error: 0.06451336
## Degrees of freedom: 113 total; 103 residual
AIC(m_Mycobacterium.LogCopiespermLofMilk3)
## [1] 27.08939
AIC(mod.Mycobacterium)
## [1] -80.071
# Testing simpler model
mod3.Mycobacterium = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=Mycobacterium.InnOnly,
weights = varIdent(form = ~1 | VariableKit))
summary(mod3.Mycobacterium)
## Generalized least squares fit by REML
## Model: LogCopiespermLofMilk ~ VariableKit
## Data: Mycobacterium.InnOnly
## AIC BIC logLik
## 185.8061 223.0942 -78.90304
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | VariableKit
## Parameter estimates:
## COREDNA EZFood Mastitis Pfood PSoilP PviralDNA ZymoDNA
## 1.000000 4.515682 1.366589 1.101159 2.666382 1.232636 1.903249
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 6.810009 0.0723945 94.06805 0.0000
## VariableKitEZFood -2.838291 0.6244796 -4.54505 0.0000
## VariableKitMastitis -0.114164 0.1225920 -0.93125 0.3538
## VariableKitPfood -0.516586 0.1076842 -4.79722 0.0000
## VariableKitPSoilP -1.372077 0.2061603 -6.65539 0.0000
## VariableKitPviralDNA -0.563972 0.1149088 -4.90800 0.0000
## VariableKitZymoDNA -0.505179 0.1556457 -3.24570 0.0016
##
## Correlation:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VariableKitEZFood -0.116
## VariableKitMastitis -0.591 0.068
## VariableKitPfood -0.672 0.078 0.397
## VariableKitPSoilP -0.351 0.041 0.207 0.236
## VariableKitPviralDNA -0.630 0.073 0.372 0.424 0.221
## VariableKitZymoDNA -0.465 0.054 0.275 0.313 0.163 0.293
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.6471390 -1.2065620 0.4419297 0.7611800 1.6844881
##
## Residual standard error: 0.3071438
## Degrees of freedom: 113 total; 106 residual
AIC(m_Mycobacterium.LogCopiespermLofMilk3)
## [1] 27.08939
AIC(mod.Mycobacterium)
## [1] -80.071
AIC(mod3.Mycobacterium) #mod.Mycobacterium is best model (including qPCRefficiency and SpikeSet)
## [1] 185.8061
#mod.Mycobacterium not assuming homoscedasticity and including qPCRefficiency and SpikeSet is a much better fit than any of the alternatives
mod.Mycobacterium.best <- mod.Mycobacterium
# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Mycobacterium_emmeans <- emmeans(mod.Mycobacterium,pairwise~VariableKit)
# Use compact letter display for convenience
mod.Mycobacterium_cld <- CLD(mod.Mycobacterium_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Mycobacterium_cld_letters <- CLD(mod.Mycobacterium_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Mycobacterium_cld
## $emmeans
## VariableKit emmean SE df lower.CL upper.CL .group
## EZFood 4.401 0.484098 4.00 3.057 5.745 ABCD
## PSoilP 5.456 0.100072 16.83 5.245 5.667 A
## PviralDNA 6.191 0.027919 17.20 6.133 6.250 B
## Pfood 6.312 0.021068 13.53 6.266 6.357 C
## ZymoDNA 6.323 0.051718 16.57 6.214 6.432 BC
## Mastitis 6.714 0.007896 17.89 6.697 6.731 D
## COREDNA 6.755 0.017146 19.41 6.720 6.791 D
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: satterthwaite
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 7 estimates
## significance level used: alpha = 0.05
##
## $comparisons
## contrast estimate SE df t.ratio p.value
## PSoilP - EZFood 1.0552 0.4943 4.35 2.135 0.4591
## PviralDNA - EZFood 1.7906 0.4850 4.03 3.692 0.1274
## PviralDNA - PSoilP 0.7354 0.1041 19.57 7.065 <.0001
## Pfood - EZFood 1.9107 0.4845 4.01 3.943 0.1052
## Pfood - PSoilP 0.8555 0.1022 18.34 8.371 <.0001
## Pfood - PviralDNA 0.1201 0.0356 33.31 3.376 0.0281
## ZymoDNA - EZFood 1.9221 0.4868 4.09 3.948 0.1025
## ZymoDNA - PSoilP 0.8669 0.1126 24.93 7.700 <.0001
## ZymoDNA - PviralDNA 0.1315 0.0591 26.27 2.224 0.3172
## ZymoDNA - Pfood 0.0114 0.0557 22.02 0.205 1.0000
## Mastitis - EZFood 2.3131 0.4841 4.00 4.778 0.0576
## Mastitis - PSoilP 1.2579 0.1003 16.99 12.540 <.0001
## Mastitis - PviralDNA 0.5225 0.0297 21.22 17.578 <.0001
## Mastitis - Pfood 0.4024 0.0222 16.77 18.137 <.0001
## Mastitis - ZymoDNA 0.3910 0.0522 17.15 7.493 <.0001
## COREDNA - EZFood 2.3546 0.4845 4.01 4.860 0.0541
## COREDNA - PSoilP 1.2994 0.1017 17.95 12.772 <.0001
## COREDNA - PviralDNA 0.5640 0.0308 24.37 18.318 <.0001
## COREDNA - Pfood 0.4439 0.0279 30.10 15.898 <.0001
## COREDNA - ZymoDNA 0.4325 0.0549 20.68 7.882 <.0001
## COREDNA - Mastitis 0.0415 0.0200 26.89 2.079 0.3928
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: satterthwaite
## P value adjustment: tukey method for comparing a family of 7 estimates
# Get fitted values from model to plot with other software
emmeans(mod.Mycobacterium,~ VariableKit) %>%
summary() %>%
data.frame()
## VariableKit emmean SE df lower.CL upper.CL
## 1 COREDNA 6.755452 0.017145930 19.411487 6.719617 6.791288
## 2 EZFood 4.400879 0.484097497 4.000816 3.056917 5.744841
## 3 Mastitis 6.713983 0.007895688 17.887142 6.697387 6.730579
## 4 Pfood 6.311561 0.021067570 13.528588 6.266228 6.356895
## 5 PSoilP 5.456069 0.100071724 16.827130 5.244771 5.667368
## 6 PviralDNA 6.191480 0.027919167 17.197002 6.132627 6.250333
## 7 ZymoDNA 6.322968 0.051717793 16.569065 6.213636 6.432299
# Get summary
summary(emmeans(mod.Mycobacterium,~ VariableKit), infer=TRUE)
## VariableKit emmean SE df lower.CL upper.CL t.ratio p.value
## COREDNA 6.755 0.017146 19.41 6.720 6.791 393.997 <.0001
## EZFood 4.401 0.484098 4.00 3.057 5.745 9.091 0.0008
## Mastitis 6.714 0.007896 17.89 6.697 6.731 850.336 <.0001
## Pfood 6.312 0.021068 13.53 6.266 6.357 299.587 <.0001
## PSoilP 5.456 0.100072 16.83 5.245 5.667 54.522 <.0001
## PviralDNA 6.191 0.027919 17.20 6.133 6.250 221.764 <.0001
## ZymoDNA 6.323 0.051718 16.57 6.214 6.432 122.259 <.0001
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: satterthwaite
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod.Mycobacterium_df1_Mycobacterium.rawdata<-Mycobacterium.InnOnly[c(25,42,4)]
mod.Mycobacterium_df2_Mycobacterium.model<-emmeans(mod.Mycobacterium,~VariableKit) %>%
summary() %>%
data.frame()
ggplot() +
geom_jitter(data=mod.Mycobacterium_df1_Mycobacterium.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
geom_point(data=mod.Mycobacterium_df2_Mycobacterium.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_errorbar(data=mod.Mycobacterium_df2_Mycobacterium.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)
# Making the plot pretty
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot() +
geom_jitter(data=mod.Mycobacterium_df1_Mycobacterium.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod.Mycobacterium_df2_Mycobacterium.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)+
geom_point(data=mod.Mycobacterium_df2_Mycobacterium.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Mycobacterium_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold")+
ylim(2.5, 8.0)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
theme_bw()+
ggtitle("Mycobacterium Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
library(emmeans)
# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Mycobacterium.best_emmeans <- emmeans(mod.Mycobacterium.best,pairwise~VariableKit, mode = "df.error")
# Use compact letter display for convenience
mod.Mycobacterium.best_cld <- CLD(mod.Mycobacterium.best_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Mycobacterium.best_cld_letters <- CLD(mod.Mycobacterium.best_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Mycobacterium.best_cld_letters
## VariableKit emmean SE df lower.CL upper.CL .group
## EZFood 4.401 0.484098 96 3.440 5.362 A
## PSoilP 5.456 0.100072 96 5.257 5.655 A
## PviralDNA 6.191 0.027919 96 6.136 6.247 B
## Pfood 6.312 0.021068 96 6.270 6.353 C
## ZymoDNA 6.323 0.051718 96 6.220 6.426 BC
## Mastitis 6.714 0.007896 96 6.698 6.730 D
## COREDNA 6.755 0.017146 96 6.721 6.789 D
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: df.error
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 7 estimates
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(mod.Mycobacterium.best,~ VariableKit,mode = "df.error") %>%
summary() %>%
data.frame()
## VariableKit emmean SE df lower.CL upper.CL
## 1 COREDNA 6.755452 0.017145930 96 6.721418 6.789487
## 2 EZFood 4.400879 0.484097497 96 3.439953 5.361805
## 3 Mastitis 6.713983 0.007895688 96 6.698310 6.729656
## 4 Pfood 6.311561 0.021067570 96 6.269743 6.353380
## 5 PSoilP 5.456069 0.100071724 96 5.257429 5.654710
## 6 PviralDNA 6.191480 0.027919167 96 6.136061 6.246899
## 7 ZymoDNA 6.322968 0.051717793 96 6.220309 6.425627
# Get summary
summary(emmeans(mod.Mycobacterium.best,~ VariableKit,mode = "df.error"), infer=TRUE)
## VariableKit emmean SE df lower.CL upper.CL t.ratio p.value
## COREDNA 6.755 0.017146 96 6.721 6.789 393.997 <.0001
## EZFood 4.401 0.484098 96 3.440 5.362 9.091 <.0001
## Mastitis 6.714 0.007896 96 6.698 6.730 850.336 <.0001
## Pfood 6.312 0.021068 96 6.270 6.353 299.587 <.0001
## PSoilP 5.456 0.100072 96 5.257 5.655 54.522 <.0001
## PviralDNA 6.191 0.027919 96 6.136 6.247 221.764 <.0001
## ZymoDNA 6.323 0.051718 96 6.220 6.426 122.259 <.0001
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: df.error
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod_df1_Mycobacterium.rawdata<-Mycobacterium.InnOnly[c(25,42,4)]
mod_df2_Mycobacterium.best.model<-emmeans(mod.Mycobacterium.best,~VariableKit, mode = "df.error") %>%
summary() %>%
data.frame()
ggplot() +
geom_jitter(data=mod_df1_Mycobacterium.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
geom_point(data=mod_df2_Mycobacterium.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_errorbar(data=mod_df2_Mycobacterium.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)
# Making the plot pretty
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot() +
geom_jitter(data=mod_df1_Mycobacterium.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet)) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Mycobacterium.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Mycobacterium.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Mycobacterium.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
theme_bw()+
ggtitle("Mycobacterium Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
Model not assuming homoscedasticity and including VariableKit + SpikeSet + qPCRefficiency was chosen.
qPCRefficiency is forced into all final models
Formula: mod.Mycobacterium = nlme::gls(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Mycobacterium.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod.Mycobacterium) # best model
-80.071
Other Models for Reference:
mod3.Mycobacterium = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=Mycobacterium.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod3.Mycobacterium)
185.8061
Manuscript Figures: Mycobacterium
# Mycobacterium: Milk Data and Controls
Mycobacterium.Inn.Ctrl <- Mycobacterium %>% filter(VariableSampleType!="NP40InoculatedMilk")
dim(Mycobacterium.Inn.Ctrl)
## [1] 240 42
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2338B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
Mycobacterium.Inn.Ctrl$VariableSampleType <- factor(Mycobacterium.Inn.Ctrl$VariableSampleType, levels=c('InoculatedMilk', 'UninoculatedMilk', 'NoTemplateControl', 'MockCommunity'))
ggplot(data=Mycobacterium.Inn.Ctrl, aes(VariableKit,LogCopiespermLofMilk, color= VariableKit, shape=SpikeSet))+
scale_shape_discrete(solid=F) +
ylab ("Mycobacterium Log10 Copies / mL of Milk")+
xlab ("Kit")+
geom_point(aes(colour = VariableKit), size = 2, stroke = .5, position=position_jitterdodge(jitter.width=0, dodge.width = 1), show.legend = F) +
facet_wrap(vars(VariableSampleType),nrow = 1)+
ggtitle("Mycobacterium DNA Copy Numbers - All Samples and Controls")+
theme_bw()+
ylim(0, 9)+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=90,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Warning: Removed 101 rows containing missing values (geom_point).
ggsave("Mycobacterium-AllSamples.TIFF", width = 9, height = 3,units = "in", dpi = 600)
## Warning: Removed 101 rows containing missing values (geom_point).
ggplot(Mycobacterium.InnOnly, aes(VariableKit,LogCopiespermLofMilk,shape = factor(SpikeSet))) +
scale_shape_discrete(solid=F) +
geom_point(aes(colour = VariableKit), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = 1)) +
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Mycobacterium DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
# Plot overlaying model estimates to raw data
mod_df1_Mycobacterium.rawdata<- Mycobacterium.InnOnly[c(25,42,4)]
mod_df2_Mycobacterium.best.model<-emmeans(mod.Mycobacterium.best,~VariableKit, mode = "df.error") %>%
summary() %>%
data.frame()
ggplot() +
geom_jitter(data=mod_df1_Mycobacterium.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2,stroke = 1, width = .2 ) +
scale_shape_discrete(solid=F) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Mycobacterium.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Mycobacterium.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Mycobacterium.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 1.2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Mycobacterium DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
ggsave("Mycobacterium-Model-Jitter.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)
ggplot() +
geom_point(data=mod_df1_Mycobacterium.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = .5)) +
scale_shape_discrete(solid=F) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Mycobacterium.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Mycobacterium.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Mycobacterium.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 1.2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Mycobacterium DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
ggsave("Mycobacterium-Model.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)
Salmonella
#Data File: CleanDNAprepData1.18.19
library(ggplot2)
library(dplyr)
library(emmeans)
library(multcompView)
#Filter Subset from Sample Data
Salmonella <- SampleData %>% filter(Assay=="Salmonella sp.")
dim(Salmonella)
## [1] 240 42
#Summary Statistics
Salmonella.summary <- Salmonella %>%
group_by(VariableKit,VariableSampleType) %>%
summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
st_dev=sd(LogCopiespermLofMilk,na.rm=T),
n_missing=sum(is.na(LogCopiespermLofMilk)),
n_total=n())%>%
data.frame()
## `summarise()` regrouping output by 'VariableKit' (override with `.groups` argument)
write.table (Salmonella.summary, "Salmonella.summary.txt", sep="\t" )
#Plot Raw Means and Standard Deviations
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(data=Salmonella,
mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk,z=VariableKit, color=VariableKit, ylab="Copy Numbers")) +
ylab ("Log10 Copies / mL of Milk")+
geom_boxplot(lwd=1)+
theme_bw()+
ggtitle("Salmonella Copy Numbers")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk", "MockCommunity", "NoTemplateControl"))
## Warning: Removed 100 rows containing non-finite values (stat_boxplot).
ggplot(data=Salmonella,
mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
ylab ("Log10 Copies / mL of Milk")+
geom_jitter(width=0.25)+
ggtitle("Salmonella Copy Numbers")+
theme_bw()+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))
## Warning: Removed 100 rows containing missing values (geom_point).
ggplot(data=Salmonella,
mapping=aes(x=VariableSampleType,y=LogCopiespermLofMilk, color=SpikeSet)) +
ylab ("Log10 Copies / mL of Milk")+
geom_jitter(width=0.35)+
ggtitle("Salmonella Copy Numbers")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("UninoculatedMilk", "InoculatedMilk", "NP40InoculatedMilk", "MockCommunity", "NoTemplateControl"))
## Warning: Removed 100 rows containing missing values (geom_point).
# Inoculated Milk Data
Salmonella.InnOnly <- Salmonella %>% filter(VariableSampleType=="InoculatedMilk", LogCopiespermLofMilk>0.001)
Salmonella.InnOnly %>%
group_by(VariableSampleType, VariableSpikeSet, VariableKit) %>%
summarize(mean_LogCopiespermLofMilk=mean(LogCopiespermLofMilk,na.rm=T),
st_dev=sd(LogCopiespermLofMilk,na.rm=T),
n_missing=sum(is.na(LogCopiespermLofMilk)),
n_total=n())%>%
data.frame()
## `summarise()` regrouping output by 'VariableSampleType', 'VariableSpikeSet' (override with `.groups` argument)
## VariableSampleType VariableSpikeSet VariableKit mean_LogCopiespermLofMilk
## 1 InoculatedMilk First COREDNA 6.021926
## 2 InoculatedMilk First EZFood 3.553886
## 3 InoculatedMilk First Mastitis 5.055847
## 4 InoculatedMilk First Pfood 4.427881
## 5 InoculatedMilk First PSoilP 3.313556
## 6 InoculatedMilk First PviralDNA 5.637821
## 7 InoculatedMilk First ZymoDNA 4.600919
## 8 InoculatedMilk Second COREDNA 5.819654
## 9 InoculatedMilk Second Mastitis 5.855133
## 10 InoculatedMilk Second Pfood 5.161440
## 11 InoculatedMilk Second PSoilP 4.265153
## 12 InoculatedMilk Second PviralDNA 5.648986
## 13 InoculatedMilk Second ZymoDNA 5.412725
## 14 InoculatedMilk Third COREDNA 6.381353
## 15 InoculatedMilk Third EZFood 5.491051
## 16 InoculatedMilk Third Mastitis 6.491253
## 17 InoculatedMilk Third Pfood 6.136920
## 18 InoculatedMilk Third PSoilP 5.157145
## 19 InoculatedMilk Third PviralDNA 6.017032
## 20 InoculatedMilk Third ZymoDNA 6.011617
## st_dev n_missing n_total
## 1 0.03689540 0 6
## 2 0.92148645 0 6
## 3 0.05638093 0 6
## 4 0.02807177 0 6
## 5 0.16097627 0 6
## 6 0.04113447 0 6
## 7 0.09323729 0 6
## 8 0.08889109 0 6
## 9 0.01505745 0 6
## 10 0.07302418 0 6
## 11 0.24970082 0 6
## 12 0.06073656 0 6
## 13 0.02695026 0 6
## 14 0.15232282 0 6
## 15 NA 0 1
## 16 0.03059223 0 6
## 17 0.04606326 0 6
## 18 0.08449168 0 4
## 19 0.10019394 0 6
## 20 0.20713085 0 6
#3 linear models were compared: including SpikeSet only, qPCRefficiency only, and both as covariates. Best model fit was used as the final model.
m_Salmonella.LogCopiespermLofMilk1 <- lm( LogCopiespermLofMilk ~ VariableKit + SpikeSet, data=Salmonella.InnOnly )
summary(m_Salmonella.LogCopiespermLofMilk1)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + SpikeSet, data = Salmonella.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.83872 -0.21061 -0.01782 0.19011 0.62629
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.49384 0.09486 57.913 < 2e-16 ***
## VariableKitEZFood -1.83557 0.16017 -11.460 < 2e-16 ***
## VariableKitMastitis -0.27357 0.11654 -2.347 0.02080 *
## VariableKitPfood -0.83223 0.11654 -7.141 1.30e-10 ***
## VariableKitPSoilP -1.86476 0.12028 -15.504 < 2e-16 ***
## VariableKitPviralDNA -0.30636 0.11654 -2.629 0.00987 **
## VariableKitZymoDNA -0.73256 0.11654 -6.286 7.77e-09 ***
## SpikeSetSecond 0.53492 0.08194 6.528 2.49e-09 ***
## SpikeSetThird 1.20649 0.08176 14.757 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3496 on 104 degrees of freedom
## Multiple R-squared: 0.8671, Adjusted R-squared: 0.8569
## F-statistic: 84.82 on 8 and 104 DF, p-value: < 2.2e-16
m_Salmonella.LogCopiespermLofMilk2 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency, data=Salmonella.InnOnly )
summary(m_Salmonella.LogCopiespermLofMilk2)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency,
## data = Salmonella.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.88308 -0.41227 0.08769 0.33440 0.89243
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.86150 0.20697 37.983 < 2e-16 ***
## VariableKitEZFood -1.73247 0.20215 -8.570 9.68e-14 ***
## VariableKitMastitis 0.05774 0.15006 0.385 0.70118
## VariableKitPfood -0.50093 0.15006 -3.338 0.00117 **
## VariableKitPSoilP -1.53819 0.15620 -9.847 < 2e-16 ***
## VariableKitPviralDNA -0.30636 0.14633 -2.094 0.03870 *
## VariableKitZymoDNA -0.40125 0.15006 -2.674 0.00869 **
## qPCRefficiency -2.83325 0.28417 -9.970 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.439 on 105 degrees of freedom
## Multiple R-squared: 0.7885, Adjusted R-squared: 0.7744
## F-statistic: 55.91 on 7 and 105 DF, p-value: < 2.2e-16
m_Salmonella.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Salmonella.InnOnly )
summary(m_Salmonella.LogCopiespermLofMilk3)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency +
## SpikeSet, data = Salmonella.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.78830 -0.07483 -0.00161 0.10882 0.67589
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.55018 0.21269 35.499 < 2e-16 ***
## VariableKitEZFood -1.28789 0.12564 -10.251 < 2e-16 ***
## VariableKitMastitis 0.08887 0.08996 0.988 0.325513
## VariableKitPfood -0.46979 0.08996 -5.222 9.25e-07 ***
## VariableKitPSoilP -1.48765 0.09296 -16.002 < 2e-16 ***
## VariableKitPviralDNA -0.30636 0.08263 -3.707 0.000339 ***
## VariableKitZymoDNA -0.37012 0.08996 -4.114 7.84e-05 ***
## qPCRefficiency -3.09952 0.30413 -10.191 < 2e-16 ***
## SpikeSetSecond 0.85860 0.06621 12.967 < 2e-16 ***
## SpikeSetThird 0.57925 0.08455 6.851 5.49e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2479 on 103 degrees of freedom
## Multiple R-squared: 0.9338, Adjusted R-squared: 0.928
## F-statistic: 161.5 on 9 and 103 DF, p-value: < 2.2e-16
# Fit of model with both qPCRefficiency and SpikeSet is better than fit of model with SpikeSet only
anova(m_Salmonella.LogCopiespermLofMilk1, m_Salmonella.LogCopiespermLofMilk2, m_Salmonella.LogCopiespermLofMilk3)
## Analysis of Variance Table
##
## Model 1: LogCopiespermLofMilk ~ VariableKit + SpikeSet
## Model 2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency
## Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 104 12.713
## 2 105 20.235 -1 -7.5219 122.39 < 2.2e-16 ***
## 3 103 6.330 2 13.9051 113.13 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(m_Salmonella.LogCopiespermLofMilk1)
## [1] 93.80425
AIC(m_Salmonella.LogCopiespermLofMilk2)
## [1] 144.3238
AIC(m_Salmonella.LogCopiespermLofMilk3)
## [1] 17.00478
# Final model chosen:
# Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
# m_Salmonella.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Salmonella.InnOnly )
library(lme4)
library(lmerTest)
model1 = lmer(LogCopiespermLofMilk ~ VariableKit + (1|SpikeSet),
data=Salmonella.InnOnly,
REML=TRUE)
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
## Data: Salmonella.InnOnly
##
## REML criterion at convergence: 106.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.2635 -0.6174 -0.0525 0.5580 1.8184
##
## Random effects:
## Groups Name Variance Std.Dev.
## SpikeSet (Intercept) 0.3621 0.6018
## Residual 0.1222 0.3496
## Number of obs: 113, groups: SpikeSet, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.0743 0.3571 2.1906 17.011 0.00228 **
## VariableKitEZFood -1.8392 0.1601 104.1022 -11.486 < 2e-16 ***
## VariableKitMastitis -0.2736 0.1165 104.0004 -2.347 0.02080 *
## VariableKitPfood -0.8322 0.1165 104.0004 -7.141 1.30e-10 ***
## VariableKitPSoilP -1.8655 0.1203 104.0052 -15.510 < 2e-16 ***
## VariableKitPviralDNA -0.3064 0.1165 104.0004 -2.629 0.00987 **
## VariableKitZymoDNA -0.7326 0.1165 104.0004 -6.286 7.77e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VarblKtEZFd -0.119
## VrblKtMstts -0.163 0.364
## VariblKtPfd -0.163 0.364 0.500
## VarblKtPSlP -0.158 0.357 0.484 0.484
## VrblKtPvDNA -0.163 0.364 0.500 0.500 0.484
## VrblKtZyDNA -0.163 0.364 0.500 0.500 0.484 0.500
model2 = lmer(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1|SpikeSet),
data=Salmonella.InnOnly,
REML=TRUE)
summary(model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
## Data: Salmonella.InnOnly
##
## REML criterion at convergence: 33.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.2179 -0.3065 -0.0233 0.4196 2.7233
##
## Random effects:
## Groups Name Variance Std.Dev.
## SpikeSet (Intercept) 0.19217 0.4384
## Residual 0.06144 0.2479
## Number of obs: 113, groups: SpikeSet, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 8.02377 0.32161 4.95852 24.949 2.10e-06 ***
## VariableKitEZFood -1.29290 0.12534 103.84473 -10.315 < 2e-16 ***
## VariableKitMastitis 0.08782 0.08979 103.69083 0.978 0.330345
## VariableKitPfood -0.47085 0.08979 103.69083 -5.244 8.36e-07 ***
## VariableKitPSoilP -1.48885 0.09281 103.62123 -16.042 < 2e-16 ***
## VariableKitPviralDNA -0.30636 0.08263 103.04345 -3.708 0.000339 ***
## VariableKitZymoDNA -0.37117 0.08979 103.69083 -4.134 7.26e-05 ***
## qPCRefficiency -3.09049 0.30063 104.91698 -10.280 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA
## VarblKtEZFd 0.165
## VrblKtMstts 0.113 0.469
## VariblKtPfd 0.113 0.469 0.577
## VarblKtPSlP 0.118 0.464 0.564 0.564
## VrblKtPvDNA -0.128 0.330 0.460 0.460 0.445
## VrblKtZyDNA 0.113 0.469 0.577 0.577 0.564 0.460
## qPCReffcncy -0.590 -0.424 -0.391 -0.391 -0.395 0.000 -0.391
anova(model1, model2)
## refitting model(s) with ML (instead of REML)
## Data: Salmonella.InnOnly
## Models:
## model1: LogCopiespermLofMilk ~ VariableKit + (1 | SpikeSet)
## model2: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + (1 | SpikeSet)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model1 9 107.958 132.504 -44.979 89.958
## model2 10 31.314 58.588 -5.657 11.314 78.643 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC (model1)
## [1] 124.5633
AIC (model2)
## [1] 53.32024
m_Salmonella.LogCopiespermLofMilk <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Salmonella.InnOnly )
summary(m_Salmonella.LogCopiespermLofMilk)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency +
## SpikeSet, data = Salmonella.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.78830 -0.07483 -0.00161 0.10882 0.67589
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.55018 0.21269 35.499 < 2e-16 ***
## VariableKitEZFood -1.28789 0.12564 -10.251 < 2e-16 ***
## VariableKitMastitis 0.08887 0.08996 0.988 0.325513
## VariableKitPfood -0.46979 0.08996 -5.222 9.25e-07 ***
## VariableKitPSoilP -1.48765 0.09296 -16.002 < 2e-16 ***
## VariableKitPviralDNA -0.30636 0.08263 -3.707 0.000339 ***
## VariableKitZymoDNA -0.37012 0.08996 -4.114 7.84e-05 ***
## qPCRefficiency -3.09952 0.30413 -10.191 < 2e-16 ***
## SpikeSetSecond 0.85860 0.06621 12.967 < 2e-16 ***
## SpikeSetThird 0.57925 0.08455 6.851 5.49e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2479 on 103 degrees of freedom
## Multiple R-squared: 0.9338, Adjusted R-squared: 0.928
## F-statistic: 161.5 on 9 and 103 DF, p-value: < 2.2e-16
plot(x=predict(m_Salmonella.LogCopiespermLofMilk),y=resid(m_Salmonella.LogCopiespermLofMilk))
# using ggplot2
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot(m_Salmonella.LogCopiespermLofMilk, aes(x=predict(m_Salmonella.LogCopiespermLofMilk), y=resid(m_Salmonella.LogCopiespermLofMilk), color=VariableKit)) +
geom_point()+
theme_bw()+
ggtitle("Salmonella Innoculated Only - Model Fit - Residuals vs Predicted")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
geom_hline(yintercept = 1) +
geom_hline(yintercept = -1)
# qqplots
qqnorm(resid(m_Salmonella.LogCopiespermLofMilk))
qqline(resid(m_Salmonella.LogCopiespermLofMilk))
summary(m_Salmonella.LogCopiespermLofMilk)
##
## Call:
## lm(formula = LogCopiespermLofMilk ~ VariableKit + qPCRefficiency +
## SpikeSet, data = Salmonella.InnOnly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.78830 -0.07483 -0.00161 0.10882 0.67589
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.55018 0.21269 35.499 < 2e-16 ***
## VariableKitEZFood -1.28789 0.12564 -10.251 < 2e-16 ***
## VariableKitMastitis 0.08887 0.08996 0.988 0.325513
## VariableKitPfood -0.46979 0.08996 -5.222 9.25e-07 ***
## VariableKitPSoilP -1.48765 0.09296 -16.002 < 2e-16 ***
## VariableKitPviralDNA -0.30636 0.08263 -3.707 0.000339 ***
## VariableKitZymoDNA -0.37012 0.08996 -4.114 7.84e-05 ***
## qPCRefficiency -3.09952 0.30413 -10.191 < 2e-16 ***
## SpikeSetSecond 0.85860 0.06621 12.967 < 2e-16 ***
## SpikeSetThird 0.57925 0.08455 6.851 5.49e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2479 on 103 degrees of freedom
## Multiple R-squared: 0.9338, Adjusted R-squared: 0.928
## F-statistic: 161.5 on 9 and 103 DF, p-value: < 2.2e-16
# ONly 1 large residual was identified, and it belonged to EZfood
Salmonella.InnOnly$resid <- resid(m_Salmonella.LogCopiespermLofMilk)
Salmonella.InnOnly %>%
filter(abs(resid)>1) %>%
select(VariableKit,resid) %>%
group_by(VariableKit) %>%
summarize(n=n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 1 x 2
## VariableKit n
## <chr> <int>
## 1 EZFood 1
# Check Tukey-adjusted pairwise comparison of kit estimates
m_Salmonella.LogCopiespermLofMilk_emmeans <- emmeans(m_Salmonella.LogCopiespermLofMilk,pairwise~VariableKit)
# Use compact letter display for convenience
m_Salmonella.LogCopiespermLofMilk_cld <- CLD(m_Salmonella.LogCopiespermLofMilk_emmeans$emmeans,
Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
m_Salmonella.LogCopiespermLofMilk_cld
## VariableKit emmean SE df lower.CL upper.CL .group
## PSoilP 4.32 0.0630 103 4.19 4.44 A
## EZFood 4.52 0.1011 103 4.32 4.72 A
## Pfood 5.33 0.0591 103 5.22 5.45 B
## ZymoDNA 5.43 0.0591 103 5.32 5.55 B
## PviralDNA 5.50 0.0642 103 5.37 5.62 B
## COREDNA 5.80 0.0642 103 5.68 5.93 C
## Mastitis 5.89 0.0591 103 5.78 6.01 C
##
## Results are averaged over the levels of: SpikeSet
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 7 estimates
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(m_Salmonella.LogCopiespermLofMilk,~ VariableKit) %>%
summary() %>%
data.frame()
## VariableKit emmean SE df lower.CL upper.CL
## 1 COREDNA 5.803758 0.06417945 103 5.676474 5.931043
## 2 EZFood 4.515871 0.10110663 103 4.315350 4.716392
## 3 Mastitis 5.892633 0.05912317 103 5.775376 6.009890
## 4 Pfood 5.333969 0.05912317 103 5.216712 5.451226
## 5 PSoilP 4.316106 0.06299397 103 4.191172 4.441039
## 6 PviralDNA 5.497394 0.06417945 103 5.370109 5.624679
## 7 ZymoDNA 5.433642 0.05912317 103 5.316386 5.550899
# Plot fitted values from model
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
emmeans(m_Salmonella.LogCopiespermLofMilk,~VariableKit) %>%
summary() %>%
data.frame() %>%
ggplot(aes(x=VariableKit,y=emmean,color=VariableKit)) +
geom_point() +
labs(y="Estimated Marginal Means") +
geom_errorbar(aes(ymin=lower.CL,ymax=upper.CL),width=0.5) +
geom_text(data=data.frame(m_Salmonella.LogCopiespermLofMilk_cld),aes(x=VariableKit,label=`.group`),hjust=-.1) +
theme_bw()+
ggtitle("Salmonella Copy Numbers - Inoculated Milk Only")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
# from https://cran.r-project.org/web/packages/emmeans/vignettes/FAQs.html#contents
library(nlme)
# Final model chosen:
# Model 3: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
# m_Salmonella.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Salmonella.InnOnly )
mod.Salmonella = nlme::gls(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Salmonella.InnOnly,
weights = varIdent(form = ~1 | VariableKit))
summary(mod.Salmonella)
## Generalized least squares fit by REML
## Model: LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet
## Data: Salmonella.InnOnly
## AIC BIC logLik
## -67.22324 -22.43285 50.61162
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | VariableKit
## Parameter estimates:
## COREDNA EZFood Mastitis Pfood PSoilP PviralDNA ZymoDNA
## 1.0000000 8.1183574 0.3580213 1.5164933 2.2489849 1.2699044 1.1815861
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 7.439099 0.0948369 78.44094 0.0000
## VariableKitEZFood -1.363513 0.3273561 -4.16523 0.0001
## VariableKitMastitis 0.061077 0.0305593 1.99864 0.0483
## VariableKitPfood -0.497587 0.0478911 -10.38997 0.0000
## VariableKitPSoilP -1.522134 0.0665349 -22.87723 0.0000
## VariableKitPviralDNA -0.306365 0.0404458 -7.57470 0.0000
## VariableKitZymoDNA -0.397914 0.0415666 -9.57292 0.0000
## qPCRefficiency -2.861799 0.1289834 -22.18735 0.0000
## SpikeSetSecond 0.784978 0.0194802 40.29625 0.0000
## SpikeSetThird 0.536250 0.0387801 13.82796 0.0000
##
## Correlation:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA VKZDNA qPCRff
## VariableKitEZFood 0.031
## VariableKitMastitis 0.257 0.091
## VariableKitPfood 0.164 0.058 0.583
## VariableKitPSoilP 0.121 0.042 0.422 0.269
## VariableKitPviralDNA -0.163 0.047 0.507 0.323 0.233
## VariableKitZymoDNA 0.189 0.067 0.672 0.429 0.310 0.372
## qPCRefficiency -0.958 -0.057 -0.494 -0.315 -0.231 0.000 -0.363
## SpikeSetSecond 0.178 0.040 0.140 0.090 0.066 0.000 0.103 -0.284
## SpikeSetThird -0.887 -0.040 -0.432 -0.276 -0.194 0.000 -0.318 0.876
## SpkStS
## VariableKitEZFood
## VariableKitMastitis
## VariableKitPfood
## VariableKitPSoilP
## VariableKitPviralDNA
## VariableKitZymoDNA
## qPCRefficiency
## SpikeSetSecond
## SpikeSetThird -0.018
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.1463772 -0.6589410 -0.1011567 0.6409211 3.0768720
##
## Residual standard error: 0.1061618
## Degrees of freedom: 113 total; 103 residual
AIC(m_Salmonella.LogCopiespermLofMilk3)
## [1] 17.00478
AIC(mod.Salmonella)
## [1] -67.22324
# Testing simpler model
mod3.Salmonella = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=Salmonella.InnOnly,
weights = varIdent(form = ~1 | VariableKit))
summary(mod3.Salmonella)
## Generalized least squares fit by REML
## Model: LogCopiespermLofMilk ~ VariableKit
## Data: Salmonella.InnOnly
## AIC BIC logLik
## 194.3074 231.5955 -83.1537
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | VariableKit
## Parameter estimates:
## COREDNA EZFood Mastitis Pfood PSoilP PviralDNA ZymoDNA
## 1.0000000 4.3188465 2.3442877 2.7961189 2.9648645 0.7489734 2.3532494
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 6.074311 0.0608632 99.80266 0.0000
## VariableKitEZFood -2.243688 0.4258834 -5.26831 0.0000
## VariableKitMastitis -0.273567 0.1551199 -1.76358 0.0807
## VariableKitPfood -0.832231 0.1807369 -4.60465 0.0000
## VariableKitPSoilP -1.943009 0.2008415 -9.67434 0.0000
## VariableKitPviralDNA -0.306365 0.0760416 -4.02891 0.0001
## VariableKitZymoDNA -0.732557 0.1556217 -4.70730 0.0000
##
## Correlation:
## (Intr) VrKEZF VrblKM VrblKP VrKPSP VKPDNA
## VariableKitEZFood -0.143
## VariableKitMastitis -0.392 0.056
## VariableKitPfood -0.337 0.048 0.132
## VariableKitPSoilP -0.303 0.043 0.119 0.102
## VariableKitPviralDNA -0.800 0.114 0.314 0.270 0.243
## VariableKitZymoDNA -0.391 0.056 0.153 0.132 0.119 0.313
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.803309694 -0.926163281 0.002337255 0.888570595 1.895109185
##
## Residual standard error: 0.2582208
## Degrees of freedom: 113 total; 106 residual
AIC(m_Salmonella.LogCopiespermLofMilk3)
## [1] 17.00478
AIC(mod.Salmonella)
## [1] -67.22324
AIC(mod3.Salmonella) #mod.Salmonella is best model (including qPCRefficiency and SpikeSet)
## [1] 194.3074
#mod.Salmonella not assuming homoscedasticity and including qPCRefficiency and SpikeSet is a much better fit than any of the alternatives
mod.Salmonella.best <- mod.Salmonella
# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Salmonella_emmeans <- emmeans(mod.Salmonella,pairwise~VariableKit)
# Use compact letter display for convenience
mod.Salmonella_cld <- CLD(mod.Salmonella_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Salmonella_cld_letters <- CLD(mod.Salmonella_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Salmonella_cld
## $emmeans
## VariableKit emmean SE df lower.CL upper.CL .group
## PSoilP 4.302 0.059846 15.05 4.175 4.430 A
## EZFood 4.461 0.325949 6.03 3.664 5.258 ABC
## Pfood 5.327 0.038139 15.59 5.246 5.408 B
## ZymoDNA 5.427 0.029813 19.29 5.364 5.489 BC
## PviralDNA 5.518 0.033712 16.80 5.447 5.589 C
## COREDNA 5.825 0.027439 19.66 5.767 5.882 D
## Mastitis 5.886 0.009741 20.00 5.865 5.906 D
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: satterthwaite
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 7 estimates
## significance level used: alpha = 0.05
##
## $comparisons
## contrast estimate SE df t.ratio p.value
## EZFood - PSoilP 0.1586 0.3313 6.43 0.479 0.9983
## Pfood - PSoilP 1.0245 0.0707 24.98 14.483 <.0001
## Pfood - EZFood 0.8659 0.3281 6.18 2.639 0.2547
## ZymoDNA - PSoilP 1.1242 0.0666 22.11 16.874 <.0001
## ZymoDNA - EZFood 0.9656 0.3272 6.12 2.951 0.1818
## ZymoDNA - Pfood 0.0997 0.0481 31.09 2.072 0.3931
## PviralDNA - PSoilP 1.2158 0.0694 25.01 17.529 <.0001
## PviralDNA - EZFood 1.0571 0.3279 6.17 3.224 0.1335
## PviralDNA - Pfood 0.1912 0.0517 36.50 3.696 0.0116
## PviralDNA - ZymoDNA 0.0915 0.0459 34.29 1.992 0.4376
## COREDNA - PSoilP 1.5221 0.0665 22.13 22.877 <.0001
## COREDNA - EZFood 1.3635 0.3274 6.13 4.165 0.0486
## COREDNA - Pfood 0.4976 0.0479 31.21 10.390 <.0001
## COREDNA - ZymoDNA 0.3979 0.0416 37.15 9.573 <.0001
## COREDNA - PviralDNA 0.3064 0.0404 29.17 7.575 <.0001
## Mastitis - PSoilP 1.5832 0.0604 15.59 26.224 <.0001
## Mastitis - EZFood 1.4246 0.3260 6.03 4.370 0.0404
## Mastitis - Pfood 0.5587 0.0390 16.94 14.328 <.0001
## Mastitis - ZymoDNA 0.4590 0.0309 22.01 14.857 <.0001
## Mastitis - PviralDNA 0.3674 0.0363 21.07 10.123 <.0001
## Mastitis - COREDNA 0.0611 0.0306 25.49 1.999 0.4395
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: satterthwaite
## P value adjustment: tukey method for comparing a family of 7 estimates
# Get fitted values from model to plot with other software
emmeans(mod.Salmonella,~ VariableKit) %>%
summary() %>%
data.frame()
## VariableKit emmean SE df lower.CL upper.CL
## 1 COREDNA 5.824509 0.027438825 19.661528 5.767209 5.881808
## 2 EZFood 4.460996 0.325948885 6.025485 3.664245 5.257747
## 3 Mastitis 5.885586 0.009740565 19.997508 5.865267 5.905904
## 4 Pfood 5.326922 0.038138735 15.588851 5.245897 5.407946
## 5 PSoilP 4.302375 0.059845536 15.053862 4.174857 4.429893
## 6 PviralDNA 5.518144 0.033711890 16.795573 5.446952 5.589336
## 7 ZymoDNA 5.426595 0.029812567 19.289759 5.364260 5.488930
# Get summary
summary(emmeans(mod.Salmonella,~ VariableKit), infer=TRUE)
## VariableKit emmean SE df lower.CL upper.CL t.ratio p.value
## COREDNA 5.825 0.027439 19.66 5.767 5.882 212.273 <.0001
## EZFood 4.461 0.325949 6.03 3.664 5.258 13.686 <.0001
## Mastitis 5.886 0.009741 20.00 5.865 5.906 604.234 <.0001
## Pfood 5.327 0.038139 15.59 5.246 5.408 139.672 <.0001
## PSoilP 4.302 0.059846 15.05 4.175 4.430 71.891 <.0001
## PviralDNA 5.518 0.033712 16.80 5.447 5.589 163.685 <.0001
## ZymoDNA 5.427 0.029813 19.29 5.364 5.489 182.024 <.0001
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: satterthwaite
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod.Salmonella_df1_Salmonella.rawdata<-Salmonella.InnOnly[c(25,42,4)]
mod.Salmonella_df2_Salmonella.model<-emmeans(mod.Salmonella,~VariableKit) %>%
summary() %>%
data.frame()
ggplot() +
geom_jitter(data=mod.Salmonella_df1_Salmonella.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
geom_point(data=mod.Salmonella_df2_Salmonella.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_errorbar(data=mod.Salmonella_df2_Salmonella.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)
# Making the plot pretty
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot() +
geom_jitter(data=mod.Salmonella_df1_Salmonella.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk, color=VariableKit, shape=SpikeSet)) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod.Salmonella_df2_Salmonella.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)+
geom_point(data=mod.Salmonella_df2_Salmonella.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Salmonella_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold")+
ylim(1.5, 7.0)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
theme_bw()+
ggtitle("Salmonella Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
### Salmonella Figure
library(emmeans)
# Check Tukey-adjusted pairwise comparison of kit estimates
mod.Salmonella.best_emmeans <- emmeans(mod.Salmonella.best,pairwise~VariableKit, mode = "df.error")
# Use compact letter display for convenience
mod.Salmonella.best_cld <- CLD(mod.Salmonella.best_emmeans$emmeans, sort=TRUE, details=TRUE, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Salmonella.best_cld_letters <- CLD(mod.Salmonella.best_emmeans$emmeans, Letters=LETTERS)
## Warning: 'CLD' will be deprecated. Its use is discouraged.
## See '?cld.emmGrid' for an explanation. Use 'pwpp' or 'multcomp::cld' instead.
mod.Salmonella.best_cld_letters
## VariableKit emmean SE df lower.CL upper.CL .group
## PSoilP 4.302 0.059846 96 4.184 4.421 A
## EZFood 4.461 0.325949 96 3.814 5.108 AB
## Pfood 5.327 0.038139 96 5.251 5.403 B
## ZymoDNA 5.427 0.029813 96 5.367 5.486 BC
## PviralDNA 5.518 0.033712 96 5.451 5.585 C
## COREDNA 5.825 0.027439 96 5.770 5.879 D
## Mastitis 5.886 0.009741 96 5.866 5.905 D
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: df.error
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 7 estimates
## significance level used: alpha = 0.05
# Get fitted values from model to plot with other software
emmeans(mod.Salmonella.best,~ VariableKit,mode = "df.error") %>%
summary() %>%
data.frame()
## VariableKit emmean SE df lower.CL upper.CL
## 1 COREDNA 5.824509 0.027438825 96 5.770043 5.878974
## 2 EZFood 4.460996 0.325948885 96 3.813992 5.107999
## 3 Mastitis 5.885586 0.009740565 96 5.866251 5.904920
## 4 Pfood 5.326922 0.038138735 96 5.251217 5.402626
## 5 PSoilP 4.302375 0.059845536 96 4.183582 4.421167
## 6 PviralDNA 5.518144 0.033711890 96 5.451226 5.585062
## 7 ZymoDNA 5.426595 0.029812567 96 5.367418 5.485772
# Get summary
summary(emmeans(mod.Salmonella.best,~ VariableKit,mode = "df.error"), infer=TRUE)
## VariableKit emmean SE df lower.CL upper.CL t.ratio p.value
## COREDNA 5.825 0.027439 96 5.770 5.879 212.273 <.0001
## EZFood 4.461 0.325949 96 3.814 5.108 13.686 <.0001
## Mastitis 5.886 0.009741 96 5.866 5.905 604.234 <.0001
## Pfood 5.327 0.038139 96 5.251 5.403 139.672 <.0001
## PSoilP 4.302 0.059846 96 4.184 4.421 71.891 <.0001
## PviralDNA 5.518 0.033712 96 5.451 5.585 163.685 <.0001
## ZymoDNA 5.427 0.029813 96 5.367 5.486 182.024 <.0001
##
## Results are averaged over the levels of: SpikeSet
## Degrees-of-freedom method: df.error
## Confidence level used: 0.95
# Plot overlaying model estimates to raw data
mod_df1_Salmonella.rawdata<-Salmonella.InnOnly[c(25,42,4)]
mod_df2_Salmonella.best.model<-emmeans(mod.Salmonella.best,~VariableKit, mode = "df.error") %>%
summary() %>%
data.frame()
ggplot() +
geom_jitter(data=mod_df1_Salmonella.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk)) +
geom_point(data=mod_df2_Salmonella.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_errorbar(data=mod_df2_Salmonella.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.5)
# Making the plot pretty
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2326B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
ggplot() +
geom_jitter(data=mod_df1_Salmonella.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet)) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Salmonella.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Salmonella.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Salmonella.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 0.2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
xlab("Kit")+
ylab ("Log10 Copies / mL of Milk")+
theme_bw()+
ggtitle("Salmonella Copy Numbers - Inoculated Milk Only - Not assuming homoscedasticity ")+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
Model not assuming homoscedasticity and including VariableKit + SpikeSet + qPCRefficiency was chosen.
qPCRefficiency is forced into all final models
Formula: mod.Salmonella = nlme::gls(LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Salmonella.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod.Salmonella)
-67.22324 # best model
Other Models for Reference:
Formula: mod3.Salmonella = nlme::gls(LogCopiespermLofMilk ~ VariableKit, data=Salmonella.InnOnly, weights = varIdent(form = ~1 | VariableKit))
AIC(mod3.Salmonella)
194.3074
Previously chosen Linear Model that assumed homoscedasticity for reference:
Formula: m_Salmonella.LogCopiespermLofMilk3 <- lm( LogCopiespermLofMilk ~ VariableKit + qPCRefficiency + SpikeSet, data=Salmonella.InnOnly )
AIC(m_Salmonella.LogCopiespermLofMilk3)
17.00478
Manuscript Figures: Salmonella
# Salmonella: Milk Data and Controls
Salmonella.Inn.Ctrl <- Salmonella %>% filter(VariableSampleType!="NP40InoculatedMilk")
dim(Salmonella.Inn.Ctrl)
## [1] 240 42
Colors <- c("COREDNA" = "#4DB3C7", "EZFood"= "#85CA46", "Mastitis"= "#F49D00","Pfood"= "#D2338B", "PSoilP"="#1D6E9B", "PviralDNA"= "#6850B4", "ZymoDNA"="#165F05")
Salmonella.Inn.Ctrl$VariableSampleType <- factor(Salmonella.Inn.Ctrl$VariableSampleType, levels=c('InoculatedMilk', 'UninoculatedMilk', 'NoTemplateControl', 'MockCommunity'))
ggplot(data=Salmonella.Inn.Ctrl, aes(VariableKit,LogCopiespermLofMilk, color= VariableKit, shape=SpikeSet))+
scale_shape_discrete(solid=F) +
ylab ("Salmonella Log10 Copies / mL of Milk")+
xlab ("Kit")+
geom_point(aes(colour = VariableKit), size = 2, stroke = .5, position=position_jitterdodge(jitter.width=0, dodge.width = 1), show.legend = F) +
facet_wrap(vars(VariableSampleType),nrow = 1)+
ggtitle("Salmonella DNA Copy Numbers - All Samples and Controls")+
theme_bw()+
ylim(0, 9)+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=90,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
## Warning: Removed 100 rows containing missing values (geom_point).
ggsave("Salmonella-AllSamples.TIFF", width = 9, height = 3,units = "in", dpi = 600)
## Warning: Removed 100 rows containing missing values (geom_point).
ggplot(Salmonella.InnOnly, aes(VariableKit,LogCopiespermLofMilk,shape = factor(SpikeSet))) +
scale_shape_discrete(solid=F) +
geom_point(aes(colour = VariableKit), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = 1)) +
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Salmonella DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
scale_color_manual(values=Colors)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
theme(axis.text.x=element_text(angle=25,vjust=0.5))+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
# Plot overlaying model estimates to raw data
mod_df1_Salmonella.rawdata<- Salmonella.InnOnly[c(25,42,4)]
mod_df2_Salmonella.best.model<-emmeans(mod.Salmonella.best,~VariableKit, mode = "df.error") %>%
summary() %>%
data.frame()
ggplot() +
geom_jitter(data=mod_df1_Salmonella.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2,stroke = 1, width = .2 ) +
scale_shape_discrete(solid=F) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Salmonella.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Salmonella.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Salmonella.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Salmonella DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
ggsave("Salmonella-Model-Jitter.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)
ggplot() +
geom_point(data=mod_df1_Salmonella.rawdata,aes(x=VariableKit,y=LogCopiespermLofMilk,color=VariableKit, shape=SpikeSet), size = 2, stroke = 1, position=position_jitterdodge(jitter.width=0, dodge.width = .5)) +
scale_shape_discrete(solid=F) +
scale_color_manual(values=Colors)+
geom_errorbar(data=mod_df2_Salmonella.best.model,aes(x=VariableKit,ymin=lower.CL,ymax=upper.CL),width=0.3)+
geom_point(data=mod_df2_Salmonella.best.model,aes(x=VariableKit,y=emmean,fill=VariableKit))+
geom_text(data =data.frame(mod.Salmonella.best_cld_letters),aes(x=VariableKit,label=`.group`, y=emmean), nudge_y = 2, nudge_x = -0.05, fontface = "bold") +
#ylim(3.5, 6.5)+
ylab ("Log10 Copies / mL of Milk")+
xlab ("Kit")+
ggtitle("Salmonella DNA Copy Numbers - Inoculated Milk Only")+
theme_bw()+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.background = element_blank())+
theme(panel.border = element_blank())+
theme(axis.line = element_line())+
scale_x_discrete(limits=c("COREDNA", "Mastitis", "EZFood", "Pfood", "PSoilP", "PviralDNA", "ZymoDNA"))
ggsave("Salmonella-Model.TIFF", width = 7.5, height = 3.5 , units = "in", dpi = 600)
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] lmerTest_3.1-3 lme4_1.1-25 Matrix_1.2-18 nlme_3.1-150
## [5] multcompView_0.1-8 emmeans_1.5.2-1 dplyr_1.0.2 ggplot2_3.3.2
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.5 nloptr_1.2.2.2 plyr_1.8.6
## [4] pillar_1.4.7 compiler_4.0.3 tools_4.0.3
## [7] boot_1.3-25 statmod_1.4.35 digest_0.6.27
## [10] lattice_0.20-41 evaluate_0.14 lifecycle_0.2.0
## [13] tibble_3.0.4 gtable_0.3.0 pkgconfig_2.0.3
## [16] rlang_0.4.8 cli_2.2.0 yaml_2.2.1
## [19] mvtnorm_1.1-1 xfun_0.19 withr_2.3.0
## [22] stringr_1.4.0 knitr_1.30 generics_0.1.0
## [25] vctrs_0.3.5 grid_4.0.3 tidyselect_1.1.0
## [28] glue_1.4.2 R6_2.5.0 fansi_0.4.1
## [31] rmarkdown_2.5 minqa_1.2.4 purrr_0.3.4
## [34] farver_2.0.3 magrittr_2.0.1 MASS_7.3-53
## [37] splines_4.0.3 scales_1.1.1 ellipsis_0.3.1
## [40] htmltools_0.5.0 assertthat_0.2.1 colorspace_2.0-0
## [43] xtable_1.8-4 numDeriv_2016.8-1.1 labeling_0.4.2
## [46] utf8_1.1.4 stringi_1.5.3 estimability_1.3
## [49] munsell_0.5.0 crayon_1.3.4